# Global Code

In [2]:
import os
import librosa
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import tensorflow as tf
import librosa as lr
import soundfile as sf
import scipy.signal as sig
from random import random, randint, shuffle
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier


In [3]:
dataset_dir = '/content/drive/MyDrive/iit_our_rec/'
ambient_path = '/content/drive/MyDrive/iit_our_rec/noise_44min.wav'

In [4]:
# List of durations to test
durations = [0.1, 0.2, 0.4, 0.6, 0.8, 1.0]
SR = 44100  # Sample rate

# Mel Spec

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf

def mix_audio(main_folder_path, file_path_ambient, duration, SR):
    mel_spec_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_aircraft_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        elif 'aircraft'.lower() in file_name.lower():
            flag = 2
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)

            signal_data, sr = librosa.load(file_path, sr=None, mono=True)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, mono=True)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[:w]))
            else:
                pass

            N = int(s / segment_sr_required)

            for i in range(N):
                start = int(i * segment_sr_required)
                end = int(start + segment_sr_required)

                segment = signal_data[start:end]
                start_noise = int(i * segment_sr_required)
                end_noise = int(start_noise + segment_sr_required)

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise


                    segment_mel_spec = librosa.feature.melspectrogram(
                        y=adjusted_audio_signal, sr=SR, n_fft=2048, hop_length=512, win_length=1024, n_mels=256
                    )
                    mean_of_mel = np.mean(segment_mel_spec, axis=-1)

                    mel_spec_data.append(mean_of_mel)
                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1
                    elif flag == 2:
                        no_of_aircraft_samples += 1

    if len(mel_spec_data) == 0:
        raise ValueError("No audio data available for the given duration.")

    mel_spec_data = tf.keras.preprocessing.sequence.pad_sequences(mel_spec_data)
    labels = tf.keras.utils.to_categorical(labels, num_classes=3)

    mean_mel_spec = np.mean(mel_spec_data)
    std_mel_spec = np.std(mel_spec_data)
    normalized_mel_spec_data = (mel_spec_data - mean_mel_spec) / std_mel_spec

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples,
          "\nAircraft samples:", no_of_aircraft_samples)

    return normalized_mel_spec_data, labels


In [None]:
# Initialize a list to store accuracy values for each duration
accuracies = []

for duration in durations:
    # Call mix_audio function to get the data and labels
    normalized_mel_spec_data, labels = mix_audio(dataset_dir, ambient_path, duration, SR)

    # Convert labels to 1D array
    y_labels = np.argmax(labels, axis=1)

    # Split data into train and test sets
    X_train, X_test, y_train_labels, y_test_labels = train_test_split(normalized_mel_spec_data, y_labels, test_size=0.2, random_state=42)

    # Create and train SVM classifier
    svm = SVC(C=10, kernel='rbf', gamma='scale')
    svm.fit(X_train, y_train_labels)

    # Predict and calculate accuracy
    y_pred = svm.predict(X_test)
    accuracy = accuracy_score(y_test_labels, y_pred)

    # Append accuracy to the list
    accuracies.append(accuracy)

# Print accuracy for each duration
for i, duration in enumerate(durations):
    print(f"Duration: {duration} seconds - SVM Accuracy: {accuracies[i]}")

# Mfcc

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf

def mix_audio(main_folder_path, file_path_ambient, duration, SR):
    mfcc_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_aircraft_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        elif 'aircraft'.lower() in file_name.lower():
            flag = 2
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)

            signal_data, sr = librosa.load(file_path, sr=None, mono=True)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, mono=True)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[:w]))
            else:
                pass

            N = int(s / segment_sr_required)

            for i in range(N):
                start = int(i * segment_sr_required)
                end = int(start + segment_sr_required)

                segment = signal_data[start:end]
                start_noise = int(i * segment_sr_required)
                end_noise = int(start_noise + segment_sr_required)

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise


                    # Replace Mel spectrogram calculation with MFCC calculation
                    segment_mfcc = librosa.feature.mfcc(
                        y=adjusted_audio_signal, sr=SR, n_fft=2048, hop_length=512, n_mfcc=13
                    )
                    mean_of_mfcc = np.mean(segment_mfcc, axis=-1)

                    # Append MFCC features instead of Mel spectrogram features
                    mfcc_data.append(mean_of_mfcc)

                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1
                    elif flag == 2:
                        no_of_aircraft_samples += 1

    if len(mfcc_data) == 0:
        raise ValueError("No audio data available for the given duration.")

    # Pad MFCC data instead of Mel spectrogram data
    mfcc_data = tf.keras.preprocessing.sequence.pad_sequences(mfcc_data)
    labels = tf.keras.utils.to_categorical(labels, num_classes=3)

    mean_mfcc_spec = np.mean(mfcc_data)
    std_mel_spec = np.std(mfcc_data)
    normalized_mfcc_spec_data = (mfcc_data - mean_mfcc_spec) / std_mel_spec

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples,
          "\nAircraft samples:", no_of_aircraft_samples)

    return normalized_mfcc_spec_data, labels


In [None]:
# Initialize a list to store accuracy values for each duration
accuracies = []

for duration in durations:
    # Call mix_audio function to get the data and labels
    normalized_mfcc_data, labels = mix_audio(dataset_dir, ambient_path, duration, SR)

    # Convert labels to 1D array
    y_labels = np.argmax(labels, axis=1)

    # Split data into train and test sets
    X_train, X_test, y_train_labels, y_test_labels = train_test_split(normalized_mfcc_data, y_labels, test_size=0.2, random_state=42)

    # Create and train SVM classifier
    svm = SVC(C=10, kernel='rbf', gamma='scale')
    svm.fit(X_train, y_train_labels)

    # Predict and calculate accuracy
    y_pred = svm.predict(X_test)
    accuracy = accuracy_score(y_test_labels, y_pred)

    # Append accuracy to the list
    accuracies.append(accuracy)

# Print accuracy for each duration
for i, duration in enumerate(durations):
    print(f"Duration: {duration} seconds - SVM Accuracy: {accuracies[i]}")


# psd with log base 10

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf

def mix_audio(main_folder_path, file_path_ambient, duration, SR):
    psd_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_aircraft_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        elif 'aircraft'.lower() in file_name.lower():
            flag = 2
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)

            signal_data, sr = librosa.load(file_path, sr=None, mono=True)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, mono=True)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[:w]))
            else:
                pass

            N = int(s / segment_sr_required)

            for i in range(N):
                start = int(i * segment_sr_required)
                end = int(start + segment_sr_required)

                segment = signal_data[start:end]
                start_noise = int(i * segment_sr_required)
                end_noise = int(start_noise + segment_sr_required)

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise


                    # Replace MFCC calculation with PSD calculation
                    stft = librosa.core.stft(adjusted_audio_signal)
                    psd = np.abs(stft) ** 2
                    log_psd = np.log10(psd + 1e-10)  # Logarithm (base 10) of PSD values

                    # Append log PSD features instead of MFCC features
                    psd_data.append(log_psd)

                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1
                    elif flag == 2:
                        no_of_aircraft_samples += 1

    if len(psd_data) == 0:
        raise ValueError("No audio data available for the given duration.")

    # Pad log PSD data instead of MFCC data
    psd_data = tf.keras.preprocessing.sequence.pad_sequences(psd_data)
    labels = tf.keras.utils.to_categorical(labels, num_classes=3)

    mean_psd_spec = np.mean(psd_data)
    std_psd_spec = np.std(psd_data)
    normalized_psd_spec_data = (psd_data - mean_psd_spec) / std_psd_spec

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples,
          "\nAircraft samples:", no_of_aircraft_samples)

    return normalized_psd_spec_data, labels


In [None]:
# Initialize a list to store accuracy values for each duration
accuracies = []

for duration in durations:
    # Call mix_audio function to get the data and labels
    normalized_psd_data, labels = mix_audio(dataset_dir, ambient_path, duration, SR)

    # Convert labels to 1D array
    y_labels = np.argmax(labels, axis=1)

    # Split data into train and test sets
    X_train, X_test, y_train_labels, y_test_labels = train_test_split(normalized_psd_data, y_labels, test_size=0.2, random_state=42)

    # Reshape X_train to have two dimensions
    X_train_2d = X_train.reshape(X_train.shape[0], -1)

    # Create and train SVM classifier
    svm = SVC(C=10, kernel='rbf', gamma='scale')
    svm.fit(X_train_2d, y_train_labels)

    # Predict and calculate accuracy
    X_test_2d = X_test.reshape(X_test.shape[0], -1)
    y_pred = svm.predict(X_test_2d)
    accuracy = accuracy_score(y_test_labels, y_pred)

    # Append accuracy to the list
    accuracies.append(accuracy)


# Print accuracy for each duration
for i, duration in enumerate(durations):
    print(f"Duration: {duration} seconds - SVM Accuracy: {accuracies[i]}")

# psd without log base 10

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf

def mix_audio(main_folder_path, file_path_ambient, duration, SR):
    psd_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_aircraft_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        elif 'aircraft'.lower() in file_name.lower():
            flag = 2
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)

            signal_data, sr = librosa.load(file_path, sr=None, mono=True)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, mono=True)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[:w]))
            else:
                pass

            N = int(s / segment_sr_required)

            for i in range(N):
                start = int(i * segment_sr_required)
                end = int(start + segment_sr_required)

                segment = signal_data[start:end]
                start_noise = int(i * segment_sr_required)
                end_noise = int(start_noise + segment_sr_required)

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise


                    # Replace MFCC calculation with PSD calculation
                    stft = librosa.core.stft(adjusted_audio_signal)
                    psd = np.abs(stft) ** 2

                    # Append PSD features instead of MFCC features
                    psd_data.append(psd)

                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1
                    elif flag == 2:
                        no_of_aircraft_samples += 1

    if len(psd_data) == 0:
        raise ValueError("No audio data available for the given duration.")

    # Pad PSD data instead of MFCC data
    psd_data = tf.keras.preprocessing.sequence.pad_sequences(psd_data)
    labels = tf.keras.utils.to_categorical(labels, num_classes=3)

    mean_psd_spec = np.mean(psd_data)
    std_psd_spec = np.std(psd_data)
    normalized_psd_spec_data = (psd_data - mean_psd_spec) / std_psd_spec

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples,
          "\nAircraft samples:", no_of_aircraft_samples)

    return normalized_psd_spec_data, labels


In [None]:
# Initialize a list to store accuracy values for each duration
accuracies = []

for duration in durations:
    # Call mix_audio function to get the data and labels
    normalized_psd_data, labels = mix_audio(dataset_dir, ambient_path, duration, SR)

    # Convert labels to 1D array
    y_labels = np.argmax(labels, axis=1)

    # Split data into train and test sets
    X_train, X_test, y_train_labels, y_test_labels = train_test_split(normalized_psd_data, y_labels, test_size=0.2, random_state=42)

    # Reshape X_train to have two dimensions
    X_train_2d = X_train.reshape(X_train.shape[0], -1)

    # Create and train SVM classifier
    svm = SVC(C=10, kernel='rbf', gamma='scale')
    svm.fit(X_train_2d, y_train_labels)

    # Predict and calculate accuracy
    X_test_2d = X_test.reshape(X_test.shape[0], -1)
    y_pred = svm.predict(X_test_2d)
    accuracy = accuracy_score(y_test_labels, y_pred)

    # Append accuracy to the list
    accuracies.append(accuracy)


# Print accuracy for each duration
for i, duration in enumerate(durations):
    print(f"Duration: {duration} seconds - SVM Accuracy: {accuracies[i]}")

# Spectral Centroid with log base 10

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf

def mix_audio(main_folder_path, file_path_ambient, duration, SR):
    spectral_centroid_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_aircraft_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        elif 'aircraft'.lower() in file_name.lower():
            flag = 2
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)

            signal_data, sr = librosa.load(file_path, sr=None, mono=True)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, mono=True)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[:w]))
            else:
                pass

            N = int(s / segment_sr_required)

            for i in range(N):
                start = int(i * segment_sr_required)
                end = int(start + segment_sr_required)

                segment = signal_data[start:end]
                start_noise = int(i * segment_sr_required)
                end_noise = int(start_noise + segment_sr_required)

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise


                    # Calculate Spectral Centroid
                    spectral_centroids = librosa.feature.spectral_centroid(y=adjusted_audio_signal, sr=SR)

                    # Take the logarithm (base 10) of the Spectral Centroid values
                    log_spectral_centroids = np.log10(spectral_centroids + 1e-10)

                    # Append log Spectral Centroid features instead of PSD features
                    spectral_centroid_data.append(log_spectral_centroids)

                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1
                    elif flag == 2:
                        no_of_aircraft_samples += 1

    if len(spectral_centroid_data) == 0:
        raise ValueError("No audio data available for the given duration.")

    # Pad Spectral Centroid data instead of PSD data
    spectral_centroid_data = tf.keras.preprocessing.sequence.pad_sequences(spectral_centroid_data)

    labels = tf.keras.utils.to_categorical(labels, num_classes=3)

    mean_spec_spec = np.mean(spectral_centroid_data)
    std_spec_spec = np.std(spectral_centroid_data)
    normalized_spec_spec_data = (spectral_centroid_data - mean_spec_spec) / std_spec_spec

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples,
          "\nAircraft samples:", no_of_aircraft_samples)

    return normalized_spec_spec_data, labels


In [None]:
# Initialize a list to store accuracy values for each duration
accuracies = []

for duration in durations:
    # Call mix_audio function to get the data and labels
    normalized_spectral_centroid_data, labels = mix_audio(dataset_dir, ambient_path, duration, SR)

    # Convert labels to 1D array
    y_labels = np.argmax(labels, axis=1)

    # Split data into train and test sets
    X_train, X_test, y_train_labels, y_test_labels = train_test_split(normalized_spectral_centroid_data, y_labels, test_size=0.2, random_state=42)

    # Reshape X_train to have two dimensions
    X_train_2d = X_train.reshape(X_train.shape[0], -1)

    # Create and train SVM classifier
    svm = SVC(C=10, kernel='rbf', gamma='scale')
    svm.fit(X_train_2d, y_train_labels)

    # Predict and calculate accuracy
    X_test_2d = X_test.reshape(X_test.shape[0], -1)
    y_pred = svm.predict(X_test_2d)
    accuracy = accuracy_score(y_test_labels, y_pred)

    # Append accuracy to the list
    accuracies.append(accuracy)

# Print accuracy for each duration
for i, duration in enumerate(durations):
    print(f"Duration: {duration} seconds - SVM Accuracy: {accuracies[i]}")

# Spectral centroid without log base 10

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf

def mix_audio(main_folder_path, file_path_ambient, duration, SR):
    spectral_centroid_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_aircraft_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        elif 'aircraft'.lower() in file_name.lower():
            flag = 2
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)

            signal_data, sr = librosa.load(file_path, sr=None, mono=True)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, mono=True)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[:w]))
            else:
                pass

            N = int(s / segment_sr_required)

            for i in range(N):
                start = int(i * segment_sr_required)
                end = int(start + segment_sr_required)

                segment = signal_data[start:end]
                start_noise = int(i * segment_sr_required)
                end_noise = int(start_noise + segment_sr_required)

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise


                    # Calculate Spectral Centroid
                    spectral_centroids = librosa.feature.spectral_centroid(y=adjusted_audio_signal, sr=SR)

                    # Append Spectral Centroid features instead of log Spectral Centroid features
                    spectral_centroid_data.append(spectral_centroids)

                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1
                    elif flag == 2:
                        no_of_aircraft_samples += 1

    if len(spectral_centroid_data) == 0:
        raise ValueError("No audio data available for the given duration.")

    # Pad Spectral Centroid data instead of log Spectral Centroid data
    spectral_centroid_data = tf.keras.preprocessing.sequence.pad_sequences(spectral_centroid_data)


    labels = tf.keras.utils.to_categorical(labels, num_classes=3)

    mean_spec_spec = np.mean(spectral_centroid_data)
    std_spec_spec = np.std(spectral_centroid_data)
    normalized_spec_spec_data = (spectral_centroid_data - mean_spec_spec) / std_spec_spec

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples,
          "\nAircraft samples:", no_of_aircraft_samples)

    return normalized_spec_spec_data, labels


In [None]:
# Initialize a list to store accuracy values for each duration
accuracies = []

for duration in durations:
    # Call mix_audio function to get the data and labels
    normalized_spectral_centroid_data, labels = mix_audio(dataset_dir, ambient_path, duration, SR)

    # Convert labels to 1D array
    y_labels = np.argmax(labels, axis=1)

    # Split data into train and test sets
    X_train, X_test, y_train_labels, y_test_labels = train_test_split(normalized_spectral_centroid_data, y_labels, test_size=0.2, random_state=42)

    # Reshape X_train to have two dimensions
    X_train_2d = X_train.reshape(X_train.shape[0], -1)

    # Create and train SVM classifier
    svm = SVC(C=10, kernel='rbf', gamma='scale')
    svm.fit(X_train_2d, y_train_labels)

    # Predict and calculate accuracy
    X_test_2d = X_test.reshape(X_test.shape[0], -1)
    y_pred = svm.predict(X_test_2d)
    accuracy = accuracy_score(y_test_labels, y_pred)

    # Append accuracy to the list
    accuracies.append(accuracy)

# Print accuracy for each duration
for i, duration in enumerate(durations):
    print(f"Duration: {duration} seconds - SVM Accuracy: {accuracies[i]}")

# Zcr with log base 10

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf

def mix_audio(main_folder_path, file_path_ambient, duration, SR):
    zcr_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_aircraft_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        elif 'aircraft'.lower() in file_name.lower():
            flag = 2
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)

            signal_data, sr = librosa.load(file_path, sr=None, mono=True)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, mono=True)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[:w]))
            else:
                pass

            N = int(s / segment_sr_required)

            for i in range(N):
                start = int(i * segment_sr_required)
                end = int(start + segment_sr_required)

                segment = signal_data[start:end]
                start_noise = int(i * segment_sr_required)
                end_noise = int(start_noise + segment_sr_required)

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise


                    # Calculate Zero Crossing Rate
                    zcr = librosa.feature.zero_crossing_rate(y=adjusted_audio_signal)

                    # Take the logarithm (base 10) of the Zero Crossing Rate values
                    log_zcr = np.log10(zcr + 1e-10)

                    # Append log Zero Crossing Rate features instead of log Spectral Centroid features
                    zcr_data.append(log_zcr)

                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1
                    elif flag == 2:
                        no_of_aircraft_samples += 1

    if len(zcr_data) == 0:
        raise ValueError("No audio data available for the given duration.")

    # Pad Zero Crossing Rate data instead of log Spectral Centroid data
    zcr_data = tf.keras.preprocessing.sequence.pad_sequences(zcr_data)


    labels = tf.keras.utils.to_categorical(labels, num_classes=3)

    mean_zcr_spec = np.mean(zcr_data)
    std_zcr_spec = np.std(zcr_data)
    normalized_zcr_data = (zcr_data - mean_zcr_spec) / std_zcr_spec

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples,
          "\nAircraft samples:", no_of_aircraft_samples)

    return normalized_zcr_data, labels


In [None]:
# Initialize a list to store accuracy values for each duration
accuracies = []

for duration in durations:
    # Call mix_audio function to get the data and labels
    normalized_spectral_centroid_data, labels = mix_audio(dataset_dir, ambient_path, duration, SR)

    # Convert labels to 1D array
    y_labels = np.argmax(labels, axis=1)

    # Split data into train and test sets
    X_train, X_test, y_train_labels, y_test_labels = train_test_split(normalized_spectral_centroid_data, y_labels, test_size=0.2, random_state=42)

    # Reshape X_train to have two dimensions
    X_train_2d = X_train.reshape(X_train.shape[0], -1)

    # Create and train SVM classifier
    svm = SVC(C=10, kernel='rbf', gamma='scale')
    svm.fit(X_train_2d, y_train_labels)

    # Predict and calculate accuracy
    X_test_2d = X_test.reshape(X_test.shape[0], -1)
    y_pred = svm.predict(X_test_2d)
    accuracy = accuracy_score(y_test_labels, y_pred)

    # Append accuracy to the list
    accuracies.append(accuracy)

# Print accuracy for each duration
for i, duration in enumerate(durations):
    print(f"Duration: {duration} seconds - SVM Accuracy: {accuracies[i]}")

# Zcr without log base 10

In [5]:
import os
import numpy as np
import librosa
import tensorflow as tf
import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import RobustScaler
def mix_audio(main_folder_path, file_path_ambient, duration, SR):
    zcr_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_aircraft_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        elif 'aircraft'.lower() in file_name.lower():
            flag = 2
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)

            signal_data, sr = librosa.load(file_path, sr=None, mono=True)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, mono=True)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[:w]))
            else:
                pass

            N = int(s / segment_sr_required)

            for i in range(N):
                start = int(i * segment_sr_required)
                end = int(start + segment_sr_required)

                segment = signal_data[start:end]
                start_noise = int(i * segment_sr_required)
                end_noise = int(start_noise + segment_sr_required)

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise


                    # Calculate Zero Crossing Rate
                    zcr = librosa.feature.zero_crossing_rate(y=adjusted_audio_signal)

                    # Append Zero Crossing Rate features instead of log Zero Crossing Rate features
                    zcr_data.append(zcr[0])  # Extract the ZCR array from the 2D matrix

                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1
                    elif flag == 2:
                        no_of_aircraft_samples += 1

    if len(zcr_data) == 0:
        raise ValueError("No audio data available for the given duration.")

    # Pad Zero Crossing Rate data instead of log Zero Crossing Rate data
    zcr_data = tf.keras.preprocessing.sequence.pad_sequences(zcr_data)
    zcr_data[np.isnan(zcr_data)] = 0  # Replace remaining NaN values with zero


    if np.count_nonzero(zcr_data) > 0:
        zcr_data[zcr_data == 0] = np.min(zcr_data[zcr_data != 0])  # Replace zero values with the minimum non-zero value

    scaler = RobustScaler()
    normalized_zcr_data = scaler.fit_transform(zcr_data)
    labels = np.array(labels)  # Convert labels to a 1D array

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples,
          "\nAircraft samples:", no_of_aircraft_samples)

    return normalized_zcr_data, labels


In [8]:
# Initialize a list to store accuracy values for each duration
accuracies = []
for duration in durations:
    # Call mix_audio function to get the data and labels
    normalized_zcr_data, labels = mix_audio(dataset_dir, ambient_path, duration, SR)

    # Convert labels to 1D array
    # y_labels = np.argmax(labels, axis=1)

    # Split data into train and test sets
    X_train, X_test, y_train_labels, y_test_labels = train_test_split(normalized_zcr_data, labels, test_size=0.2, random_state=42)

    # Reshape X_train to have two dimensions
    # X_train_2d = X_train.reshape(X_train.shape[0], -1)

    # Create and train SVM classifier
    svm = SVC(C=10, kernel='rbf', gamma='scale')
    svm.fit(X_train, y_train_labels)

    # Predict and calculate accuracy
    # X_test_2d = X_test.reshape(X_test.shape[0], -1)
    y_pred = svm.predict(X_test)
    accuracy = accuracy_score(y_test_labels, y_pred)

    # Append accuracy to the list
    accuracies.append(accuracy)

# Print accuracy for each duration
for i, duration in enumerate(durations):
    print(f"Duration: {duration} seconds - SVM Accuracy: {accuracies[i]}")

Drone samples: 3494 
Swarm Drone samples: 3494 
Aircraft samples: 3494
Duration: 1 seconds - SVM Accuracy: 0.3218884120171674


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
