# Mfcc

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

def mix_audio(main_folder_path, chunk_duration_seconds, SR, Limit):
    mfcc_data = []
    labels = []

    segment_sr_required = int(chunk_duration_seconds * SR)  # Calculate the required duration in samples

    no_of_drone_samples = 0
    no_of_swarm_samples = 0
    no_of_aircraft_samples = 0
    no_of_noise_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            label = 'drone'
        elif 'swarm'.lower() in file_name.lower():
            label = 'swarm'
        elif 'aircraft'.lower() in file_name.lower():
            label = 'aircraft'
        elif 'noise'.lower() in file_name.lower():
            label = 'noise'
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, mono=True)

            # Calculate the total number of segments possible
            total_segments = len(signal_data) // segment_sr_required

            for i in range(total_segments):
                start = int(i * segment_sr_required)
                end = int(start + segment_sr_required)

                segment = signal_data[start:end]

                if len(segment) != segment_sr_required:
                    continue

                segment = segment / np.max(np.abs(segment))

                segment_mfcc = librosa.feature.mfcc(y=segment, sr=SR, n_mfcc=13)
                mean_of_mfcc = np.mean(segment_mfcc, axis=-1)

                mfcc_data.append(mean_of_mfcc)
                labels.append(label)

                if label == 'drone':
                    no_of_drone_samples += 1
                elif label == 'swarm':
                    no_of_swarm_samples += 1
                elif label == 'aircraft':
                    no_of_aircraft_samples += 1
                elif label == 'noise':
                    no_of_noise_samples += 1

    mfcc_data = tf.keras.preprocessing.sequence.pad_sequences(mfcc_data)
    labels = np.array(labels)

    mean_mfcc = np.mean(mfcc_data)
    std_mfcc = np.std(mfcc_data)
    normalized_mfcc_data = (mfcc_data - mean_mfcc) / std_mfcc

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm samples:", no_of_swarm_samples,
          "\nAircraft samples:", no_of_aircraft_samples,
          "\nNoise samples:", no_of_noise_samples)

    return normalized_mfcc_data, labels

# Specify the dataset directory, sample rate (SR), and limit
dataset_dir = '/home/user/Desktop/deepak/new_mixed'
SR = 44100
Limit = 21000
chunk_durations = [0.1, 0.2, 0.4, 0.6, 0.8, 1.0]  # Chunk durations in seconds

for chunk_duration in chunk_durations:
    # Mix audio and extract MFCC data with labels
    mfcc_data, labels = mix_audio(dataset_dir, chunk_duration, SR, Limit)

    # Use LabelEncoder to convert labels to numeric values
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        mfcc_data, labels_encoded, test_size=0.3, random_state=42
    )

    # Create an SVM classifier
    clf = svm.SVC(C=10, kernel='rbf', gamma='scale')

    # Train the SVM classifier
    clf.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = clf.predict(X_test)

    # Calculate the accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Chunk Duration: {chunk_duration} s | Accuracy: {accuracy:.4f}")

# Mel

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

def mix_audio(main_folder_path, chunk_duration_seconds, SR, Limit):
    mel_spec_data = []
    labels = []

    segment_sr_required = int(chunk_duration_seconds * SR)  # Calculate the required duration in samples

    no_of_drone_samples = 0
    no_of_swarm_samples = 0
    no_of_aircraft_samples = 0
    no_of_noise_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            label = 'drone'
        elif 'swarm'.lower() in file_name.lower():
            label = 'swarm'
        elif 'aircraft'.lower() in file_name.lower():
            label = 'aircraft'
        elif 'noise'.lower() in file_name.lower():
            label = 'noise'
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, mono=True)

            # Calculate the total number of segments possible
            total_segments = len(signal_data) // segment_sr_required

            for i in range(total_segments):
                start = int(i * segment_sr_required)
                end = int(start + segment_sr_required)

                segment = signal_data[start:end]

                if len(segment) != segment_sr_required:
                    continue

                segment = segment / cp.max(cp.abs(segment))

                mel_spec = librosa.feature.melspectrogram(y=segment, sr=SR, n_mels=128)
                mel_spec_db = librosa.power_to_db(mel_spec, ref=cp.max)

                mel_spec_data.append(mel_spec_db)
                labels.append(label)

                if label == 'drone':
                    no_of_drone_samples += 1
                elif label == 'swarm':
                    no_of_swarm_samples += 1
                elif label == 'aircraft':
                    no_of_aircraft_samples += 1
                elif label == 'noise':
                    no_of_noise_samples += 1

    mel_spec_data = tf.keras.preprocessing.sequence.pad_sequences(mel_spec_data)
    labels = cp.array(labels)

    mean_mel_spec = cp.mean(mel_spec_data)
    std_mel_spec = cp.std(mel_spec_data)
    normalized_mel_spec_data = (mel_spec_data - mean_mel_spec) / std_mel_spec

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm samples:", no_of_swarm_samples,
          "\nAircraft samples:", no_of_aircraft_samples,
          "\nNoise samples:", no_of_noise_samples)

    return normalized_mel_spec_data, labels

# Specify the dataset directory, sample rate (SR), and limit
dataset_dir = '/home/user/Desktop/deepak/new_mixed'
SR = 44100
Limit = 21000
chunk_durations = [0.1, 0.2, 0.4, 0.6, 0.8, 1.0]  # Chunk durations in seconds

for chunk_duration in chunk_durations:
    # Mix audio and extract Mel spectrogram data with labels
    mel_spec_data, labels = mix_audio(dataset_dir, chunk_duration, SR, Limit)

    # Use LabelEncoder to convert labels to numeric values
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        mel_spec_data, labels_encoded, test_size=0.3, random_state=42
    )

    # Create an SVM classifier
    clf = svm.SVC(C=10, kernel='rbf', gamma='scale')

    # Train the SVM classifier
    clf.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = clf.predict(X_test)

    # Calculate the accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Chunk Duration: {chunk_duration} s | Accuracy: {accuracy:.4f}")


In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

def mix_audio(main_folder_path, duration, SR, Limit):
    mel_spec_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_samples = 0
    no_of_aircraft_samples = 0
    no_of_noise_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            label = 'drone'
        elif 'swarm'.lower() in file_name.lower():
            label = 'swarm'
        elif 'aircraft'.lower() in file_name.lower():
            label = 'aircraft'
        elif 'noise'.lower() in file_name.lower():
            label = 'noise'
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, duration=15723)
            signal_data = signal_data / np.max(np.abs(signal_data))

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]

                segment_mel_spec = librosa.feature.melspectrogram(
                    y=segment, sr=SR, n_fft=2048, hop_length=512, win_length=1024, n_mels=256
                )
                mean_of_mel = np.mean(segment_mel_spec, axis=-1)

                mel_spec_data.append(mean_of_mel)
                labels.append(label)

                if label == 'drone':
                    no_of_drone_samples += 1
                elif label == 'swarm':
                    no_of_swarm_samples += 1
                elif label == 'aircraft':
                    no_of_aircraft_samples += 1
                elif label == 'noise':
                    no_of_noise_samples += 1

    mel_spec_data = tf.keras.preprocessing.sequence.pad_sequences(mel_spec_data)
    labels = np.array(labels)

    mean_mel_spec = np.mean(mel_spec_data)
    std_mel_spec = np.std(mel_spec_data)
    normalized_mel_spec_data = (mel_spec_data - mean_mel_spec) / std_mel_spec

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm samples:", no_of_swarm_samples,
          "\nAircraft samples:", no_of_aircraft_samples,
          "\nNoise samples:", no_of_noise_samples)

    return normalized_mel_spec_data, labels

# Specify the dataset directory, duration, sample rate (SR), and limit
dataset_dir = '/home/user/Desktop/deepak/new_mixed'
duration = 1
SR = 44100
Limit = 21000

# Mix audio and extract mel spectrogram data with labels
mel_spec_data, labels = mix_audio(dataset_dir, duration, SR, Limit)

# Use LabelEncoder to convert labels to numeric values
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    mel_spec_data, labels_encoded, test_size=0.2, random_state=42
)

from cuml.svm import SVC
import cupy as cp
X_train = cp.asarray(X_train)
y_train = cp.asarray(y_train)
# Create an SVM classifier
clf = SVC(C=10, kernel='rbf', gamma='scale')

# Train the SVM classifier
clf.fit(X_train, y_train)
X_test = cp.asarray(X_test)

# Make predictions on the test set
y_pred = clf.predict(X_test)
#y_test = cp.asarray(y_test)
# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred.get())
print("Accuracy:", accuracy)

Drone samples: 15723 
Swarm samples: 15723 
Aircraft samples: 15723 
Noise samples: 15723


# psd

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from cuml.model_selection import train_test_split
from cuml import svm
from cuml.metrics import accuracy_score
from scipy import signal
from cuml.preprocessing import LabelEncoder
import cupy as cp

def mix_audio(main_folder_path, duration, SR, Limit):
    psd_data = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_samples = 0
    no_of_aircraft_samples = 0
    no_of_noise_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            label = 0
        elif 'swarm'.lower() in file_name.lower():
            label = 1
        elif 'aircraft'.lower() in file_name.lower():
            label = 2
        elif 'noise'.lower() in file_name.lower():
            label = 3
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, duration=15723)
            signal_data = cp.asarray(signal_data)
            signal_data = signal_data / cp.max(cp.abs(signal_data))

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]

                f, psd = signal.welch(segment.get(), SR)
                psd = cp.asarray(psd)
                log_psd = cp.log10(psd)

                psd_data.append(log_psd)
                labels.append(label)

                if label == 0:
                    no_of_drone_samples += 1
                elif label == 1:
                    no_of_swarm_samples += 1
                elif label == 2:
                    no_of_aircraft_samples += 1
                elif label == 3:
                    no_of_noise_samples += 1

    #psd_data = tf.keras.preprocessing.sequence.pad_sequences(psd_data)
    psd_data = cp.asarray(psd_data)
    labels = cp.asarray(labels)

    mean_psd = cp.mean(psd_data)
    std_psd = cp.std(psd_data)
    normalized_psd_data = (psd_data - mean_psd) / std_psd

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm samples:", no_of_swarm_samples,
          "\nAircraft samples:", no_of_aircraft_samples,
          "\nNoise samples:", no_of_noise_samples)

    return normalized_psd_data, labels

# Specify the dataset directory, duration, sample rate (SR), and limit
dataset_dir = '/home/user/Desktop/deepak/new_mixed'
duration = 1
SR = 44100
Limit = 21000

# Mix audio and extract power spectral density (PSD) data with labels
psd_data, labels = mix_audio(dataset_dir, duration, SR, Limit)

# Use LabelEncoder to convert labels to numeric values
#label_encoder = LabelEncoder()
#labels_encoded = label_encoder.fit_transform(labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    psd_data, labels, test_size=0.3
)

# Create an SVM classifier
clf = svm.SVC(C=10, kernel='rbf', gamma='scale')

# Train the SVM classifier
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

2023-07-19 17:24:25.454238: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
--------------------------------------------------------------------------------

  CuPy may not function correctly because multiple CuPy packages are installed
  in your environment:

    cupy, cupy-cuda12x

  Follow these steps to resolve this issue:

    1. For all packages listed above, run the following command to remove all
       existing CuPy installations:

         $ pip uninstall <package_name>

      If you previously installed CuPy via conda, also run the following:

         $ conda uninstall cupy

    2. Install the appropriate CuPy package.
       Refer to the Installation Guide for detailed instructions.

         https://docs.cupy.dev/en/stable/install.html

---

Drone samples: 15723 
Swarm samples: 15723 
Aircraft samples: 15723 
Noise samples: 15723
Accuracy: 0.9127047061920166


# spectral centroid

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from cuml.model_selection import train_test_split
from cuml import svm
from cuml.metrics import accuracy_score
from cuml.preprocessing import LabelEncoder
import cupy as cp

def mix_audio(main_folder_path, duration, SR, Limit):
    spectral_centroids = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_samples = 0
    no_of_aircraft_samples = 0
    no_of_noise_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            label = 0
        elif 'swarm'.lower() in file_name.lower():
            label = 1
        elif 'aircraft'.lower() in file_name.lower():
            label = 2
        elif 'noise'.lower() in file_name.lower():
            label = 3
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, duration=15723)
            signal_data = cp.asarray(signal_data)
            signal_data = signal_data / cp.max(cp.abs(signal_data))

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]

                # Calculate the spectral centroid
                spectral_centroid = librosa.feature.spectral_centroid(y=segment.get(), sr=SR)
                log_spectral_centroid = cp.log10(cp.asarray(spectral_centroid))
                spectral_centroids.append(log_spectral_centroid)
                labels.append(label)

                if label == 0:
                    no_of_drone_samples += 1
                elif label == 1:
                    no_of_swarm_samples += 1
                elif label == 2:
                    no_of_aircraft_samples += 1
                elif label == 3:
                    no_of_noise_samples += 1

    spectral_centroids_data = cp.vstack(spectral_centroids)
    labels = cp.asarray(labels)

    mean_spectral_centroid = cp.mean(spectral_centroids_data)
    std_spectral_centroid = cp.std(spectral_centroids_data)
    normalized_spectral_centroids = (spectral_centroids_data - mean_spectral_centroid) / std_spectral_centroid

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm samples:", no_of_swarm_samples,
          "\nAircraft samples:", no_of_aircraft_samples,
          "\nNoise samples:", no_of_noise_samples)

    return normalized_spectral_centroids, labels

# Specify the dataset directory, duration, sample rate (SR), and limit
dataset_dir = '/home/user/Desktop/deepak/new_mixed'
duration = 1
SR = 44100
Limit = 21000

# Mix audio and extract spectral centroid (log base 10) data with labels
spectral_centroids_data, labels = mix_audio(dataset_dir, duration, SR, Limit)

# Use LabelEncoder to convert labels to numeric values
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    spectral_centroids_data, labels_encoded, test_size=0.3
)

# Create an SVM classifier
clf = svm.SVC(C=10, kernel='rbf', gamma='scale')

# Train the SVM classifier
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Drone samples: 15723 
Swarm samples: 15723 
Aircraft samples: 15723 
Noise samples: 15723
Accuracy: 0.4017066955566406


# zcr

In [None]:
import os
import numpy as np
import librosa
import tensorflow as tf
from cuml.model_selection import train_test_split
from cuml import svm
from cuml.metrics import accuracy_score
from cuml.preprocessing import LabelEncoder
import cupy as cp

def mix_audio(main_folder_path, duration, SR, Limit):
    zero_crossing_rates = []
    labels = []

    segment_sr_required = duration * SR

    no_of_drone_samples = 0
    no_of_swarm_samples = 0
    no_of_aircraft_samples = 0
    no_of_noise_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            label = 0
        elif 'swarm'.lower() in file_name.lower():
            label = 1
        elif 'aircraft'.lower() in file_name.lower():
            label = 2
        elif 'noise'.lower() in file_name.lower():
            label = 3
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, duration=15723)
            signal_data = cp.asarray(signal_data)
            signal_data = signal_data / cp.max(cp.abs(signal_data))

            N = int(len(signal_data) / segment_sr_required)

            for i in range(N):
                start = i * segment_sr_required
                end = start + segment_sr_required

                if end - start != SR:
                    continue

                segment = signal_data[start:end]

                # Calculate the zero crossing rate
                zero_crossing_rate = librosa.feature.zero_crossing_rate(y=segment.get())
                log_zcr = cp.log10(cp.asarray(zero_crossing_rate))

                if cp.isnan(log_zcr).any() or cp.isinf(log_zcr).any():
                    continue

                zero_crossing_rates.append(log_zcr)
                labels.append(label)

                if label == 0:
                    no_of_drone_samples += 1
                elif label == 1:
                    no_of_swarm_samples += 1
                elif label == 2:
                    no_of_aircraft_samples += 1
                elif label == 3:
                    no_of_noise_samples += 1

    zero_crossing_rates_data = cp.vstack(zero_crossing_rates)
    labels = cp.asarray(labels)

    mean_zcr = cp.mean(zero_crossing_rates_data)
    std_zcr = cp.std(zero_crossing_rates_data)

    # Replace NaN and infinity values with 0
    zero_crossing_rates_data[cp.isnan(zero_crossing_rates_data)] = 0
    zero_crossing_rates_data[cp.isinf(zero_crossing_rates_data)] = 0

    # Normalize zero crossing rate data
    normalized_zero_crossing_rates = (zero_crossing_rates_data - mean_zcr) / std_zcr

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm samples:", no_of_swarm_samples,
          "\nAircraft samples:", no_of_aircraft_samples,
          "\nNoise samples:", no_of_noise_samples)

    return normalized_zero_crossing_rates, labels

# Specify the dataset directory, duration, sample rate (SR), and limit
dataset_dir = '/home/user/Desktop/deepak/new_mixed'
duration = 1
SR = 44100
Limit = 21000

# Mix audio and extract zero crossing rate (log base 10) data with labels
zero_crossing_rates_data, labels = mix_audio(dataset_dir, duration, SR, Limit)

# Use LabelEncoder to convert labels to numeric values
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    zero_crossing_rates_data, labels_encoded, test_size=0.3
)

# Create an SVM classifier
clf = svm.SVC(C=10, kernel='rbf', gamma='scale')

# Train the SVM classifier
clf.fit(X_train, y_train)

# Make predictions on the test set
y_pred = clf.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Drone samples: 15721 
Swarm samples: 15723 
Aircraft samples: 15704 
Noise samples: 15723
Accuracy: 0.4232543408870697
