# Global Code

In [None]:
# pip install librosa

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
import os
import librosa
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import tensorflow as tf
import librosa as lr
import soundfile as sf
import scipy.signal as sig
from random import random, randint, shuffle
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier


In [None]:
def train_and_evaluate_model(X_train, y_train, X_test, y_test, model_name):
    if model_name == 'SVM':
        model = SVC(C=10, kernel='rbf', gamma='scale')
    elif model_name == 'Random Forest':
        model = RandomForestClassifier(n_estimators=100, random_state=42)
    elif model_name == 'Naive Bayes':
        model = GaussianNB()
    elif model_name == 'Decision Trees':
        model = DecisionTreeClassifier(random_state=42)
    elif model_name == 'k-Nearest Neighbors':
        model = KNeighborsClassifier(n_neighbors=5)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    confusion_mat = confusion_matrix(y_test, y_pred)

    print("Model:", model_name)
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("Confusion Matrix:")
    print(confusion_mat)
    print("--------------------------------\n")

In [None]:
model_names = ['SVM', 'Random Forest', 'Naive Bayes', 'Decision Trees', 'k-Nearest Neighbors']


In [None]:
dataset_dir = '/content/drive/MyDrive/iit_our_rec/'
ambient_path = '/content/drive/MyDrive/iit_our_rec/noise_44min.wav'

# Mel spectrogram

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf

def mix_audio(main_folder_path, file_path_ambient, duration, SR, Limit):
    mel_spec_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_aircraft_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        elif 'aircraft'.lower() in file_name.lower():
            flag = 2
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, duration=1747)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, duration=2640)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[0:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[0:w]))
            else:
                pass

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]
                start_noise = np.random.randint(0, N-1) * segment_sr_required
                end_noise = start_noise + segment_sr_required

                if end_noise - start_noise != SR:
                    continue

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise


                    segment_mel_spec = librosa.feature.melspectrogram(y=adjusted_audio_signal, sr=SR, n_fft=2048, hop_length=512, win_length=1024, n_mels=256)
                    mean_of_mel = np.mean(segment_mel_spec, axis=-1)

                    mel_spec_data.append(mean_of_mel)
                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1
                    elif flag == 2:
                        no_of_aircraft_samples += 1

    mel_spec_data = tf.keras.preprocessing.sequence.pad_sequences(mel_spec_data)
    labels = tf.keras.utils.to_categorical(labels, num_classes=3)

    mean_mel_spec = np.mean(mel_spec_data)
    std_mel_spec = np.std(mel_spec_data)
    normalized_mel_spec_data = (mel_spec_data - mean_mel_spec) / std_mel_spec

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples
          ,"\nAircraft samples:", no_of_aircraft_samples)

    return normalized_mel_spec_data, labels


In [None]:
mel_data, labels = mix_audio(dataset_dir, ambient_path, 1, 44100, 21000)


Drone samples: 17470 
Swarm Drone samples: 17470 
Aircraft samples: 17470


In [None]:
# Convert one-hot encoded labels to 1D array
y_labels = np.argmax(labels, axis=1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(mel_data, y_labels, test_size=0.2, random_state=42)


In [None]:
for model_name in model_names:
  train_and_evaluate_model(X_train, y_train, X_test, y_test, model_name)

Model: SVM
Accuracy: 0.5938752146536921
Precision: 0.7032562562438024
Recall: 0.5938752146536921
F1 Score: 0.5849125842324682
Confusion Matrix:
[[3136  221  113]
 [1654 1736   96]
 [1876  297 1353]]
--------------------------------

Model: Random Forest
Accuracy: 0.8651974813966801
Precision: 0.8653506961271721
Recall: 0.8651974813966801
F1 Score: 0.8651417750740948
Confusion Matrix:
[[3020  186  264]
 [ 208 3090  188]
 [ 336  231 2959]]
--------------------------------

Model: Naive Bayes
Accuracy: 0.45363480251860333
Precision: 0.7572200332682331
Recall: 0.45363480251860333
F1 Score: 0.388810736936837
Confusion Matrix:
[[ 789    2 2679]
 [   0  499 2987]
 [  16   43 3467]]
--------------------------------

Model: Decision Trees
Accuracy: 0.808051898492654
Precision: 0.8081810445731825
Recall: 0.808051898492654
F1 Score: 0.8078855834835477
Confusion Matrix:
[[2859  231  380]
 [ 273 2911  302]
 [ 486  340 2700]]
--------------------------------

Model: k-Nearest Neighbors
Accuracy: 0.7

# Mfcc

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf

def mix_audio(main_folder_path, file_path_ambient, duration, SR, Limit):
    mel_spec_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_aircraft_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        elif 'aircraft'.lower() in file_name.lower():
            flag = 2
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, duration=1747)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, duration=2640)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[0:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[0:w]))
            else:
                pass

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]
                start_noise = np.random.randint(0, N-1) * segment_sr_required
                end_noise = start_noise + segment_sr_required

                if end_noise - start_noise != SR:
                    continue

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise

                    mfcc = librosa.feature.mfcc(y=adjusted_audio_signal, sr=SR, n_mfcc=23)
                    mean_mfcc = np.mean(mfcc, axis=-1)

                    mel_spec_data.append(mean_mfcc)
                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1
                    elif flag == 2:
                        no_of_aircraft_samples += 1

    mel_spec_data = tf.keras.preprocessing.sequence.pad_sequences(mel_spec_data)
    labels = tf.keras.utils.to_categorical(labels, num_classes=3)

    mean_mel_spec = np.mean(mel_spec_data)
    std_mel_spec = np.std(mel_spec_data)
    normalized_mel_spec_data = (mel_spec_data - mean_mel_spec) / std_mel_spec

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples
          ,"\nAircraft samples:", no_of_aircraft_samples)

    return normalized_mel_spec_data, labels


In [None]:
mfcc_data, labels_mfcc = mix_audio(dataset_dir, ambient_path, 1, 44100, 21000)

# Convert one-hot encoded labels to 1D array
y_labels = np.argmax(labels_mfcc, axis=1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(mfcc_data, y_labels, test_size=0.2, random_state=42)

Drone samples: 17470 
Swarm Drone samples: 17470 
Aircraft samples: 17470


In [None]:
for model_name in model_names:
  train_and_evaluate_model(X_train, y_train, X_test, y_test, model_name)

Model: SVM
Accuracy: 0.940660179355085
Precision: 0.9414153173301337
Recall: 0.940660179355085
F1 Score: 0.9408167988886603
Confusion Matrix:
[[3409   11   50]
 [   2 3155  329]
 [  20  210 3296]]
--------------------------------

Model: Random Forest
Accuracy: 0.9758633848502194
Precision: 0.9760549057011368
Recall: 0.9758633848502194
F1 Score: 0.9759038387402049
Confusion Matrix:
[[3438    2   30]
 [   0 3354  132]
 [  11   78 3437]]
--------------------------------

Model: Naive Bayes
Accuracy: 0.7706544552566305
Precision: 0.7986387335990769
Recall: 0.7706544552566305
F1 Score: 0.7727180492093898
Confusion Matrix:
[[2929  104  437]
 [  23 2119 1344]
 [ 100  396 3030]]
--------------------------------

Model: Decision Trees
Accuracy: 0.9308338103415379
Precision: 0.9309911966858734
Recall: 0.9308338103415379
F1 Score: 0.9309048762862886
Confusion Matrix:
[[3370   27   73]
 [  29 3182  275]
 [  50  271 3205]]
--------------------------------

Model: k-Nearest Neighbors
Accuracy: 0.95

# ZCR

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report

def mix_audio(main_folder_path, file_path_ambient, duration, SR, Limit):
    zcr_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_aircraft_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        elif 'aircraft'.lower() in file_name.lower():
            flag = 2
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, duration=1747)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, duration=2640)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[0:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[0:w]))
            else:
                pass

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]
                start_noise = np.random.randint(0, N-1) * segment_sr_required
                end_noise = start_noise + segment_sr_required

                if end_noise - start_noise != SR:
                    continue

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise


                    zcr = librosa.feature.zero_crossing_rate(y=adjusted_audio_signal, frame_length=SR, hop_length=SR)
                    mean_zcr = np.mean(zcr, axis=-1)

                    zcr_data.append(mean_zcr)
                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1
                    elif flag == 2:
                        no_of_aircraft_samples += 1

    zcr_data = np.array(zcr_data)
    labels = tf.keras.utils.to_categorical(labels, num_classes=3)

    mean_zcr = np.mean(zcr_data)
    std_zcr = np.std(zcr_data)
    normalized_zcr_data = (zcr_data - mean_zcr) / std_zcr

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples,
          "\nAircraft samples:", no_of_aircraft_samples)

    return normalized_zcr_data, labels



In [None]:
zcr_data, labels_zcr = mix_audio(dataset_dir, ambient_path, 1, 44100, 21000)


Drone samples: 17470 
Swarm Drone samples: 17470 
Aircraft samples: 17470


In [None]:
# Convert one-hot encoded labels to 1D array
y_labels = np.argmax(labels_zcr, axis=1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(zcr_data, y_labels, test_size=0.2, random_state=42)

In [None]:
for model_name in model_names:
  train_and_evaluate_model(X_train, y_train, X_test, y_test, model_name)

Model: SVM
Accuracy: 0.5012402213318069
Precision: 0.5313582166338048
Recall: 0.5012402213318069
F1 Score: 0.4829257823276514
Confusion Matrix:
[[1430  813 1227]
 [ 410  988 2088]
 [ 185  505 2836]]
--------------------------------

Model: Random Forest
Accuracy: 0.4673726388093875
Precision: 0.46932079023117906
Recall: 0.4673726388093875
F1 Score: 0.46760046640609054
Confusion Matrix:
[[1703  996  771]
 [ 850 1370 1266]
 [ 597 1103 1826]]
--------------------------------

Model: Naive Bayes
Accuracy: 0.4956115245182217
Precision: 0.5366924480335443
Recall: 0.4956115245182217
F1 Score: 0.48085341795939673
Confusion Matrix:
[[1322 1008 1140]
 [ 347 1095 2044]
 [  98  650 2778]]
--------------------------------

Model: Decision Trees
Accuracy: 0.4671818355275711
Precision: 0.467359551400327
Recall: 0.4671818355275711
F1 Score: 0.4666669687474248
Confusion Matrix:
[[1903  962  605]
 [1050 1417 1019]
 [ 773 1176 1577]]
--------------------------------

Model: k-Nearest Neighbors
Accuracy: 

# psd

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf

def mix_audio(main_folder_path, file_path_ambient, duration, SR, Limit):
    log_psd_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_aircraft_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        elif 'aircraft'.lower() in file_name.lower():
            flag = 2
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, duration=1747)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, duration=2640)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[0:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[0:w]))
            else:
                pass

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]
                start_noise = np.random.randint(0, N-1) * segment_sr_required
                end_noise = start_noise + segment_sr_required

                if end_noise - start_noise != SR:
                    continue

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise


                    # Compute log PSD instead of MFCC
                    psd = np.abs(np.fft.fft(adjusted_audio_signal)) ** 2
                    log_psd = 10 * np.log10(psd)

                    log_psd_data.append(log_psd)
                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1
                    elif flag == 2:
                        no_of_aircraft_samples += 1

    log_psd_data = tf.keras.preprocessing.sequence.pad_sequences(log_psd_data)
    labels = np.asarray(labels)

    mean_log_psd = np.mean(log_psd_data)
    std_log_psd = np.std(log_psd_data)
    normalized_log_psd_data = (log_psd_data - mean_log_psd) / std_log_psd

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples,
          "\nAircraft samples:", no_of_aircraft_samples)

    return normalized_log_psd_data, labels


In [None]:
# less ram
import os
import numpy as np
import librosa
import tensorflow as tf

def mix_audio(main_folder_path, file_path_ambient, duration, SR):
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_aircraft_samples = 0

    mean_log_psd = 0.0
    std_log_psd = 0.0
    count_log_psd = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        elif 'aircraft'.lower() in file_name.lower():
            flag = 2
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)

            # Load signal and noise data for current file
            signal_data, sr = librosa.load(file_path, sr=None, duration=1747)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, duration=2640)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[:s]
            elif s > n:
                w = s - n
                noise_data = noise_data[:w]
            else:
                pass

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]
                start_noise = np.random.randint(0, N-1) * segment_sr_required
                end_noise = start_noise + segment_sr_required

                if end_noise - start_noise != SR:
                    continue

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise

                    # Compute log PSD instead of MFCC
                    psd = np.abs(np.fft.fft(adjusted_audio_signal)) ** 2
                    log_psd = 10 * np.log10(psd)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1
                    elif flag == 2:
                        no_of_aircraft_samples += 1

                    # Compute running mean and standard deviation
                    count_log_psd += 1
                    delta = log_psd - mean_log_psd
                    mean_log_psd += delta / count_log_psd
                    delta2 = log_psd - mean_log_psd
                    std_log_psd += delta * delta2

            # Clear memory for current file
            del signal_data, noise_data

    if count_log_psd == 0:
        raise ValueError("No audio data available for the given duration.")

    std_log_psd = np.sqrt(std_log_psd / count_log_psd)

    # Normalize log PSD values on-the-fly
    normalized_log_psd_data = []
    for file_name in os.listdir(main_folder_path):
        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, duration=1747)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, duration=2640)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[:s]
            elif s > n:
                w = s - n
                noise_data = noise_data[:w]
            else:
                pass

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]
                start_noise = np.random.randint(0, N-1) * segment_sr_required
                end_noise = start_noise + segment_sr_required

                if end_noise - start_noise != SR:
                    continue

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise

                    # Compute log PSD instead of MFCC
                    psd = np.abs(np.fft.fft(adjusted_audio_signal)) ** 2
                    log_psd = 10 * np.log10(psd)

                    # Normalize log PSD values on-the-fly
                    normalized_log_psd = (log_psd - mean_log_psd) / std_log_psd
                    normalized_log_psd_data.append(normalized_log_psd)

    normalized_log_psd_data = tf.keras.preprocessing.sequence.pad_sequences(normalized_log_psd_data)
    labels = np.asarray(labels)

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples,
          "\nAircraft samples:", no_of_aircraft_samples)

    return normalized_log_psd_data, labels


In [None]:
psd_data, labels = mix_audio(dataset_dir, ambient_path, 1, 44100)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(psd_data, labels, test_size=0.2, random_state=42)


In [None]:
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.impute import SimpleImputer

# Create an imputer to handle missing values
imputer = SimpleImputer(strategy='mean')

# Apply imputation to training data
X_train = imputer.fit_transform(X_train)

In [None]:
for model_name in model_names:
  train_and_evaluate_model(X_train, y_train, X_test, y_test, model_name)

# Spectral Centroid

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf

def mix_audio(main_folder_path, file_path_ambient, duration, SR, Limit):
    spec_centroid_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_aircraft_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        elif 'aircraft'.lower() in file_name.lower():
            flag = 2
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=44100, duration=1747)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=44100, duration=2640)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[0:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[0:w]))
            else:
                pass

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]
                start_noise = np.random.randint(0, N-1) * segment_sr_required
                end_noise = start_noise + segment_sr_required

                if end_noise - start_noise != SR:
                    continue

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise


                    spec_centroid = librosa.feature.spectral_centroid(y=adjusted_audio_signal, sr=SR)
                    log_spec_centroid = np.log10(spec_centroid)

                    spec_centroid_data.append(log_spec_centroid)
                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1
                    elif flag == 2:
                        no_of_aircraft_samples += 1

    spec_centroid_data = tf.keras.preprocessing.sequence.pad_sequences(spec_centroid_data)
    labels = tf.keras.utils.to_categorical(labels, num_classes=3)

    mean_spec_centroid = np.mean(spec_centroid_data)
    std_spec_centroid = np.std(spec_centroid_data)
    normalized_spec_centroid_data = (spec_centroid_data - mean_spec_centroid) / std_spec_centroid

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples
          ,"\nAircraft samples:", no_of_aircraft_samples)

    return normalized_spec_centroid_data, labels


In [None]:
normalized_spec_centroid_data, labels = mix_audio(dataset_dir, ambient_path, 1, 44100, 21000)

Drone samples: 17470 
Swarm Drone samples: 17470 
Aircraft samples: 17470


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.impute import SimpleImputer
import numpy as np

# Reshape normalized_spec_centroid_data to have 2 dimensions
X_train = normalized_spec_centroid_data.reshape(normalized_spec_centroid_data.shape[0], -1)

# Convert one-hot encoded labels to 1-dimensional array
y = np.argmax(labels, axis=1)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_train, y, test_size=0.2, random_state=42)

# Create an imputer to handle missing values
imputer = SimpleImputer(strategy='mean')

# Apply imputation to training data
X_train = imputer.fit_transform(X_train)

In [None]:
for model_name in model_names:
  train_and_evaluate_model(X_train, y_train, X_test, y_test, model_name)

Model: SVM
Accuracy: 0.44657508109139477
Precision: 0.737974325565513
Recall: 0.44657508109139477
F1 Score: 0.3832109984442644
Confusion Matrix:
[[ 616 2840   14]
 [   9 3404   73]
 [  20 2845  661]]
--------------------------------

Model: Random Forest
Accuracy: 0.44762449914138525
Precision: 0.7454924323132022
Recall: 0.44762449914138525
F1 Score: 0.38524012962544846
Confusion Matrix:
[[ 619 2835   16]
 [   6 3401   79]
 [   3 2851  672]]
--------------------------------

Model: Naive Bayes
Accuracy: 0.4390383514596451
Precision: 0.7271998395903105
Recall: 0.4390383514596451
F1 Score: 0.37407418004137866
Confusion Matrix:
[[ 586 2874   10]
 [  16 3374   96]
 [  13 2871  642]]
--------------------------------

Model: Decision Trees
Accuracy: 0.4425682121732494
Precision: 0.7431085670563256
Recall: 0.4425682121732494
F1 Score: 0.3761682941348525
Confusion Matrix:
[[ 617 2840   13]
 [   9 3414   63]
 [  12 2906  608]]
--------------------------------

Model: k-Nearest Neighbors
Accurac