# Libaries

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import librosa
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.metrics import silhouette_score
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import tensorflow as tf
import librosa as lr
import soundfile as sf
import scipy.signal as sig
from random import random, randint, shuffle
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier





In [None]:
pip install librosa

# Global Code

In [None]:
dataset_dir = '/content/drive/MyDrive/iit_our_rec/'
ambient_path = '/content/drive/MyDrive/iit_our_rec/noise_30min.wav'

In [None]:
def train_and_evaluate_model(X_train, y_train, X_test, y_test, model_name):
    if model_name == 'SVM':
        model = SVC(C=10, kernel='rbf', gamma='scale')
    elif model_name == 'Random Forest':
        model = RandomForestClassifier(n_estimators=100, random_state=42)
    elif model_name == 'Naive Bayes':
        model = GaussianNB()
    elif model_name == 'Decision Trees':
        model = DecisionTreeClassifier(random_state=42)
    elif model_name == 'k-Nearest Neighbors':
        model = KNeighborsClassifier(n_neighbors=5)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    confusion_mat = confusion_matrix(y_test, y_pred)

    print("Model:", model_name)
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("Confusion Matrix:")
    print(confusion_mat)
    print("\n")

In [None]:
model_names = ['SVM', 'Random Forest', 'Naive Bayes', 'Decision Trees', 'k-Nearest Neighbors']


# Mel spec

In [None]:
def mix_audio(main_folder_path, file_path_ambient, duration, SR, Limit, n_mels):
    audio_data = []
    mel_spec_data = []
    labels = []
    duration = 1
    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_noise_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, duration=1747)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, duration=1747)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[0:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[0:w]))
            else:
                pass

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]
                start_noise = np.random.randint(0, N-1) * segment_sr_required
                end_noise = start_noise + segment_sr_required

                if end_noise - start_noise != SR:
                    continue

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise

                    audio_data.append(adjusted_audio_signal)

                    segment_mel_spec = librosa.feature.melspectrogram(
                        y=adjusted_audio_signal,
                        sr=SR,
                        n_fft=2048*2,
                        hop_length=512,
                        win_length=1024,
                        n_mels=n_mels  # Change n_mels here
                    )
                    mean_of_mel = np.mean(segment_mel_spec, axis=-1)

                    mel_spec_data.append(mean_of_mel)
                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1

    # audio_data = tf.keras.preprocessing.sequence.pad_sequences(audio_data)
    mel_spec_data = tf.keras.preprocessing.sequence.pad_sequences(mel_spec_data)
    labels = tf.keras.utils.to_categorical(labels, num_classes=2)

    # mean_audio = np.mean(audio_data)
    # std_audio = np.std(audio_data)
    # normalized_audio_data = (audio_data - mean_audio) / std_audio

    mean_mel_spec = np.mean(mel_spec_data)
    std_mel_spec = np.std(mel_spec_data)
    normalized_mel_spec_data = (mel_spec_data - mean_mel_spec) / std_mel_spec

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples)

    return normalized_mel_spec_data, labels



In [None]:
n_mels_values = [64, 128, 256]  # Modify the values as desired

for n_mels in n_mels_values:
    mel_data, labels = mix_audio(dataset_dir, ambient_path, 1, 44100, 21000, n_mels)

    # Convert one-hot encoded labels to 1D array
    y_labels = np.argmax(labels, axis=1)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(mel_data, y_labels, test_size=0.2, random_state=42)

    print("n_mels:", n_mels)

    model_names = ['SVM', 'Random Forest', 'Naive Bayes', 'Decision Trees', 'k-Nearest Neighbors']
    for model_name in model_names:
        train_and_evaluate_model(X_train, y_train, X_test, y_test, model_name)

    print("--------------------------------\n")


Drone samples: 17470 
Swarm Drone samples: 17470
n_mels: 64
Model: SVM
Accuracy: 0.7884945621064682
Precision: 0.7888066424383442
Recall: 0.7884945621064682
F1 Score: 0.7884099712748027
Confusion Matrix:
[[2675  800]
 [ 678 2835]]


Model: Random Forest
Accuracy: 0.9281625643961076
Precision: 0.9281689564691489
Recall: 0.9281625643961076
F1 Score: 0.9281630233886554
Confusion Matrix:
[[3230  245]
 [ 257 3256]]


Model: Naive Bayes
Accuracy: 0.6788780767029193
Precision: 0.6877911321833616
Recall: 0.6788780767029193
F1 Score: 0.6753688022186978
Confusion Matrix:
[[2726  749]
 [1495 2018]]


Model: Decision Trees
Accuracy: 0.9022610188895249
Precision: 0.902457824785463
Recall: 0.9022610188895249
F1 Score: 0.9022554644891988
Confusion Matrix:
[[3171  304]
 [ 379 3134]]


Model: k-Nearest Neighbors
Accuracy: 0.8918145392100744
Precision: 0.8919567991075075
Recall: 0.8918145392100744
F1 Score: 0.8918108526404477
Confusion Matrix:
[[3129  346]
 [ 410 3103]]


-------------------------------

# Mfcc

In [None]:
def mix_audio(main_folder_path, file_path_ambient, duration, SR, Limit, n_mfcc):
    audio_data = []
    mfcc_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_noise_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, duration=1747)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, duration=1747)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[0:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[0:w]))
            else:
                pass

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]
                start_noise = np.random.randint(0, N-1) * segment_sr_required
                end_noise = start_noise + segment_sr_required

                if end_noise - start_noise != SR:
                    continue

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise

                    audio_data.append(adjusted_audio_signal)

                    mfcc = librosa.feature.mfcc(y=adjusted_audio_signal, sr=SR, n_mfcc=n_mfcc, n_fft=1024, hop_length=512)
                    mean_mfcc = np.mean(mfcc, axis=-1)

                    mfcc_data.append(mean_mfcc)
                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1

    # audio_data = tf.keras.preprocessing.sequence.pad_sequences(audio_data)
    mfcc_data = tf.keras.preprocessing.sequence.pad_sequences(mfcc_data)
    labels = tf.keras.utils.to_categorical(labels, num_classes=2)

    # mean_audio = np.mean(audio_data)
    # std_audio = np.std(audio_data)
    # normalized_audio_data = (audio_data - mean_audio) / std_audio

    mean_mfcc = np.mean(mfcc_data)
    std_mfcc = np.std(mfcc_data)
    normalized_mfcc_data = (mfcc_data - mean_mfcc) / std_mfcc

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples)

    return normalized_mfcc_data, labels



In [None]:
n_mfcc_values = [13, 23, 33]  # Modify the values as desired
model_names = ['SVM', 'Random Forest', 'Naive Bayes', 'Decision Trees', 'k-Nearest Neighbors']

for model_name in model_names:
    for n_mfcc in n_mfcc_values:
        mfcc_data, labels_mfcc = mix_audio(dataset_dir, ambient_path, 1, 44100, 21000, n_mfcc)

        # Convert one-hot encoded labels to 1D array
        y_labels = np.argmax(labels_mfcc, axis=1)

        # Split the data into training and testing sets
        X_train, X_test, y_train, y_test = train_test_split(mfcc_data, y_labels, test_size=0.2, random_state=42)

        train_and_evaluate_model(X_train, y_train, X_test, y_test, model_name)

        print("n_mfcc:", n_mfcc)
        print("Model:", model_name)
        print("-------------------------------------")

Drone samples: 17470 
Swarm Drone samples: 17470
Model: SVM
Accuracy: 0.9895535203205496
Precision: 0.9897018218416869
Recall: 0.9895535203205496
F1 Score: 0.9895522281603026
Confusion Matrix:
[[3408   67]
 [   6 3507]]


n_mfcc: 13
Model: SVM
-------------------------------------
Drone samples: 17470 
Swarm Drone samples: 17470
Model: SVM
Accuracy: 0.9937034917000572
Precision: 0.9937499188326328
Recall: 0.9937034917000572
F1 Score: 0.9937031760168299
Confusion Matrix:
[[3436   39]
 [   5 3508]]


n_mfcc: 23
Model: SVM
-------------------------------------
Drone samples: 17470 
Swarm Drone samples: 17470
Model: SVM
Accuracy: 0.9935603892386949
Precision: 0.9936215079316395
Recall: 0.9935603892386949
F1 Score: 0.9935599931778675
Confusion Matrix:
[[3433   42]
 [   3 3510]]


n_mfcc: 33
Model: SVM
-------------------------------------
Drone samples: 17470 
Swarm Drone samples: 17470
Model: Random Forest
Accuracy: 0.995277618775043
Precision: 0.9953027951524815
Recall: 0.995277618775043


# Zcr

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier


def mix_audio(main_folder_path, file_path_ambient, duration, SR, Limit):
    audio_data = []
    feature_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, duration=1747)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, duration=1747)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[0:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[0:w]))
            else:
                pass

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]
                start_noise = np.random.randint(0, N-1) * segment_sr_required
                end_noise = start_noise + segment_sr_required

                if end_noise - start_noise != SR:
                    continue

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise

                    audio_data.append(adjusted_audio_signal)

                    zcr = librosa.feature.zero_crossing_rate(y=adjusted_audio_signal, frame_length=2048, hop_length=512)
                    mean_zcr = np.mean(zcr, axis=-1)

                    feature_data.append(mean_zcr)
                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1

    # audio_data = np.array(audio_data)
    feature_data = np.array(feature_data)
    labels = np.array(labels)

    imputer = SimpleImputer(strategy='mean')
    feature_data = imputer.fit_transform(feature_data)

    scaler = StandardScaler()
    normalized_feature_data = scaler.fit_transform(feature_data)

    labels = tf.keras.utils.to_categorical(labels, num_classes=2)

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples)

    return normalized_feature_data, labels



model_names = ['SVM', 'Random Forest', 'Naive Bayes', 'Decision Trees', 'k-Nearest Neighbors']
zcr_data, labels_zcr = mix_audio(dataset_dir, ambient_path, 1, 44100, 21000)

y_labels = np.argmax(labels_zcr, axis=1)

X_train, X_test, y_train, y_test = train_test_split(zcr_data, y_labels, test_size=0.2, random_state=42)

for model_name in model_names:
    train_and_evaluate_model(X_train, y_train, X_test, y_test, model_name)
    print("Model:", model_name)
    print("-------------------------------------")


Drone samples: 17470 
Swarm Drone samples: 17470
Model: SVM
Accuracy: 0.6040354894104178
Precision: 0.6083105619406373
Recall: 0.6040354894104178
F1 Score: 0.6006695550766448
Confusion Matrix:
[[2422 1053]
 [1714 1799]]


Model: SVM
-------------------------------------
Model: Random Forest
Accuracy: 0.5487979393245563
Precision: 0.5487958685296906
Recall: 0.5487979393245564
F1 Score: 0.5487968027829376
Confusion Matrix:
[[1897 1578]
 [1575 1938]]


Model: Random Forest
-------------------------------------
Model: Naive Bayes
Accuracy: 0.5941614195764167
Precision: 0.5941830560164263
Recall: 0.5941614195764167
F1 Score: 0.5941640125817634
Confusion Matrix:
[[2070 1405]
 [1431 2082]]


Model: Naive Bayes
-------------------------------------
Model: Decision Trees
Accuracy: 0.5429307384087007
Precision: 0.5439157368682802
Recall: 0.5429307384087007
F1 Score: 0.5413169074433679
Confusion Matrix:
[[2095 1380]
 [1814 1699]]


Model: Decision Trees
-------------------------------------
Model

# psd

In [None]:
# def mix_audio(main_folder_path, file_path_ambient, duration, SR, Limit):
#     audio_data = []
#     feature_data = []
#     labels = []

#     segment_sr_required = duration * SR

#     no_of_drone_samples = 0
#     no_of_swarm_drone_samples = 0

#     for file_name in os.listdir(main_folder_path):
#         if 'drone'.lower() in file_name.lower():
#             flag = 0
#         elif 'swarm'.lower() in file_name.lower():
#             flag = 1
#         else:
#             continue

#         if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
#             file_path = os.path.join(main_folder_path, file_name)
#             signal_data, sr = librosa.load(file_path, sr=None, duration=1747)
#             signal_data = signal_data / np.max(np.abs(signal_data))
#             noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, duration=1747)
#             noise_data = noise_data / np.max(np.abs(noise_data))

#             s = len(signal_data)
#             n = len(noise_data)

#             if n > s:
#                 noise_data = noise_data[0:s]
#             elif s > n:
#                 w = s - n
#                 noise_data = np.concatenate((noise_data, noise_data[0:w]))
#             else:
#                 pass

#             N = int(len(signal_data) / segment_sr_required)

#             for i in range(N):
#                 start = i * segment_sr_required
#                 end = start + segment_sr_required

#                 if end - start != SR:
#                     continue

#                 segment = signal_data[start:end]
#                 start_noise = np.random.randint(0, N-1) * segment_sr_required
#                 end_noise = start_noise + segment_sr_required

#                 if end_noise - start_noise != SR:
#                     continue

#                 noise = noise_data[start_noise:end_noise]

#                 rms_signal = np.mean(np.square(segment))
#                 rms_noise = np.mean(np.square(noise))

#                 dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

#                 for j in range(len(dbset)):
#                     rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
#                     scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
#                     adjusted_audio_signal = segment * scaling_factor

#                     adjusted_audio_signal += noise

#                     audio_data.append(adjusted_audio_signal)

#                     psd = np.abs(np.fft.fft(adjusted_audio_signal)) ** 2
#                     mean_psd = np.mean(psd, axis=-1)

#                     feature_data.append(mean_psd)
#                     labels.append(flag)

#                     if flag == 0:
#                         no_of_drone_samples += 1
#                     elif flag == 1:
#                         no_of_swarm_drone_samples += 1

#     # audio_data = np.array(audio_data)
#     feature_data = np.array(feature_data)
#     labels = tf.keras.utils.to_categorical(labels, num_classes=2)

#     mean_feature = np.mean(feature_data)
#     std_feature = np.std(feature_data)
#     normalized_feature_data = (feature_data - mean_feature) / std_feature

#     print("Drone samples:", no_of_drone_samples,
#           "\nSwarm Drone samples:", no_of_swarm_drone_samples)

#     return normalized_feature_data, labels

# feature_data, labels = mix_audio(dataset_dir, ambient_path, 1, 44100, 21000)

# # Convert one-hot encoded labels to 1D array
# y_labels = np.argmax(labels, axis=1)

# feature_data = feature_data.reshape(-1, 1)

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(feature_data, y_labels, test_size=0.2, random_state=42)

# for model_name in model_names:
#     train_and_evaluate_model(X_train, y_train, X_test, y_test, model_name)

#     print("Model:", model_name)
#     print("-------------------------------------")


Drone samples: 17470 
Swarm Drone samples: 17470
Model: SVM
Accuracy: 0.4935603892386949
Precision: 0.49363403634986014
Recall: 0.4935603892386949
F1 Score: 0.4726617873308482
Confusion Matrix:
[[2411 1064]
 [2475 1038]]


Model: SVM
-------------------------------------
Model: Random Forest
Accuracy: 0.4951345163136806
Precision: 0.4951297872337345
Recall: 0.4951345163136806
F1 Score: 0.4951317867638148
Confusion Matrix:
[[1708 1767]
 [1761 1752]]


Model: Random Forest
-------------------------------------
Model: Naive Bayes
Accuracy: 0.4975672581568403
Precision: 0.4990371704690984
Recall: 0.4975672581568403
F1 Score: 0.42181099873051714
Confusion Matrix:
[[2995  480]
 [3031  482]]


Model: Naive Bayes
-------------------------------------
Model: Decision Trees
Accuracy: 0.49527761877504295
Precision: 0.4952985460810406
Recall: 0.49527761877504295
F1 Score: 0.495280688534148
Confusion Matrix:
[[1725 1750]
 [1777 1736]]


Model: Decision Trees
-------------------------------------
Mo

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier


def train_and_evaluate_model(X_train, y_train, X_test, y_test, model_name):
    if model_name == 'SVM':
        model = SVC()
    elif model_name == 'Random Forest':
        model = RandomForestClassifier()
    elif model_name == 'Naive Bayes':
        model = GaussianNB()
    elif model_name == 'Decision Trees':
        model = DecisionTreeClassifier()
    elif model_name == 'k-Nearest Neighbors':
        model = KNeighborsClassifier()

    model.fit(X_train, y_train)

    train_score = model.score(X_train, y_train)
    test_score = model.score(X_test, y_test)

    print("Training Accuracy (" + model_name + "):", train_score)
    print("Testing Accuracy (" + model_name + "):", test_score)
    print("-------------------------------------")


def mix_audio(main_folder_path, file_path_ambient, duration, SR, Limit):
    audio_data = []
    feature_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, duration=1747)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, duration=1747)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[0:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[0:w]))
            else:
                pass

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]
                start_noise = np.random.randint(0, N-1) * segment_sr_required
                end_noise = start_noise + segment_sr_required

                if end_noise - start_noise != SR:
                    continue

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise

                    audio_data.append(adjusted_audio_signal)

                    psd = np.abs(np.fft.fft(adjusted_audio_signal)) ** 2
                    mean_psd = np.mean(psd, axis=-1)

                    feature_data.append(mean_psd)
                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1

    audio_data = np.array(audio_data)
    feature_data = np.array(feature_data)
    labels = np.array(labels)

    mean_feature = np.mean(feature_data)
    std_feature = np.std(feature_data)
    normalized_feature_data = (feature_data - mean_feature) / std_feature

    labels = tf.keras.utils.to_categorical(labels, num_classes=2)

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples)

    return normalized_feature_data, labels

model_names = ['SVM', 'Random Forest', 'Naive Bayes', 'Decision Trees', 'k-Nearest Neighbors']
zcr_data, labels_zcr = mix_audio(dataset_dir, ambient_path, 1, 44100, 21000)

y_labels = np.argmax(labels_zcr, axis=1)

zcr_data = zcr_data.reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(zcr_data, y_labels, test_size=0.2, random_state=42)

for model_name in model_names:
    train_and_evaluate_model(X_train, y_train, X_test, y_test, model_name)
    print("Model:", model_name)
    print("-------------------------------------")


Drone samples: 17470 
Swarm Drone samples: 17470
Training Accuracy (SVM): 0.5031124785346308
Testing Accuracy (SVM): 0.49985689753863766
-------------------------------------
Model: SVM
-------------------------------------
Training Accuracy (Random Forest): 0.9896608471665712
Testing Accuracy (Random Forest): 0.4985689753863766
-------------------------------------
Model: Random Forest
-------------------------------------
Training Accuracy (Naive Bayes): 0.5003219805380652
Testing Accuracy (Naive Bayes): 0.5065827132226675
-------------------------------------
Model: Naive Bayes
-------------------------------------
Training Accuracy (Decision Trees): 0.9900186033199772
Testing Accuracy (Decision Trees): 0.49899828277046365
-------------------------------------
Model: Decision Trees
-------------------------------------
Training Accuracy (k-Nearest Neighbors): 0.6898254149971379
Testing Accuracy (k-Nearest Neighbors): 0.4886949055523755
-------------------------------------
Model: k-

# Spectral centroid

In [None]:
def mix_audio(main_folder_path, file_path_ambient, duration, SR, Limit):
    audio_data = []
    spectral_centroid_data = []
    labels = []
    duration = 1
    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_drone_samples = 0
    no_of_noise_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            flag = 0
        elif 'swarm'.lower() in file_name.lower():
            flag = 1
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, duration=1747)
            signal_data = signal_data / np.max(np.abs(signal_data))
            noise_data, sr_noise = librosa.load(file_path_ambient, sr=None, duration=1747)
            noise_data = noise_data / np.max(np.abs(noise_data))

            s = len(signal_data)
            n = len(noise_data)

            if n > s:
                noise_data = noise_data[0:s]
            elif s > n:
                w = s - n
                noise_data = np.concatenate((noise_data, noise_data[0:w]))
            else:
                pass

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]
                start_noise = np.random.randint(0, N-1) * segment_sr_required
                end_noise = start_noise + segment_sr_required

                if end_noise - start_noise != SR:
                    continue

                noise = noise_data[start_noise:end_noise]

                rms_signal = np.mean(np.square(segment))
                rms_noise = np.mean(np.square(noise))

                dbset = [-25, -20, -15, -10, -5, 0, 5, 10, 15, 20]

                for j in range(len(dbset)):
                    rms_signal_req_to_increase = rms_noise / (10 ** (-dbset[j] / 10))
                    scaling_factor = np.sqrt(rms_signal_req_to_increase / rms_signal)
                    adjusted_audio_signal = segment * scaling_factor

                    adjusted_audio_signal += noise

                    audio_data.append(adjusted_audio_signal)

                    spectral_centroid = librosa.feature.spectral_centroid(
                        y=adjusted_audio_signal,
                        sr=SR,
                        n_fft=2048*2,
                        hop_length=512,
                        win_length=1024
                    )
                    mean_of_spectral_centroid = np.mean(spectral_centroid)

                    spectral_centroid_data.append(mean_of_spectral_centroid)
                    labels.append(flag)

                    if flag == 0:
                        no_of_drone_samples += 1
                    elif flag == 1:
                        no_of_swarm_drone_samples += 1

    # audio_data = tf.keras.preprocessing.sequence.pad_sequences(audio_data)
    spectral_centroid_data = np.array(spectral_centroid_data).reshape(-1, 1)
    labels = tf.keras.utils.to_categorical(labels, num_classes=2)

    # mean_audio = np.mean(audio_data)
    # std_audio = np.std(audio_data)
    # normalized_audio_data = (audio_data - mean_audio) / std_audio

    mean_spectral_centroid = np.mean(spectral_centroid_data)
    std_spectral_centroid = np.std(spectral_centroid_data)
    normalized_spectral_centroid_data = (spectral_centroid_data - mean_spectral_centroid) / std_spectral_centroid

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm Drone samples:", no_of_swarm_drone_samples)

    return normalized_spectral_centroid_data, labels



In [None]:
spectral_centroid_data, labels = mix_audio(dataset_dir, ambient_path, 1, 44100, 21000)

# Convert one-hot encoded labels to 1D array
y_labels = np.argmax(labels, axis=1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(spectral_centroid_data, y_labels, test_size=0.2, random_state=42)

for model_name in model_names:

    train_and_evaluate_model(X_train, y_train, X_test, y_test, model_name)

    # print("n_samples:", n)
    print("Model:", model_name)
    print("-------------------------------------")



Drone samples: 17470 
Swarm Drone samples: 17470
Model: SVM
Accuracy: 0.7228105323411562
Precision: 0.7343285669575679
Recall: 0.7228105323411562
F1 Score: 0.7190917008696133
Confusion Matrix:
[[2114 1361]
 [ 576 2937]]


Model: SVM
-------------------------------------
Model: Random Forest
Accuracy: 0.6315111619919863
Precision: 0.6315095383852788
Recall: 0.6315111619919863
F1 Score: 0.6315102337983077
Confusion Matrix:
[[2186 1289]
 [1286 2227]]


Model: Random Forest
-------------------------------------
Model: Naive Bayes
Accuracy: 0.703062392673154
Precision: 0.7038183420464956
Recall: 0.703062392673154
F1 Score: 0.7026960764547798
Confusion Matrix:
[[2324 1151]
 [ 924 2589]]


Model: Naive Bayes
-------------------------------------
Model: Decision Trees
Accuracy: 0.6306525472238123
Precision: 0.6306519830728942
Recall: 0.6306525472238123
F1 Score: 0.6306522522337061
Confusion Matrix:
[[2184 1291]
 [1290 2223]]


Model: Decision Trees
-------------------------------------
Model: 