# Mfcc

In [None]:
import os
import cupy as cp
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

def mix_audio(main_folder_path, chunk_duration_seconds, SR, Limit):
    mfcc_data = []
    labels = []

    segment_sr_required = int(chunk_duration_seconds * SR)  # Calculate the required duration in samples

    no_of_drone_samples = 0
    no_of_swarm_samples = 0
    no_of_aircraft_samples = 0
    no_of_noise_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            label = 'drone'
        elif 'swarm'.lower() in file_name.lower():
            label = 'swarm'
        elif 'aircraft'.lower() in file_name.lower():
            label = 'aircraft'
        elif 'noise'.lower() in file_name.lower():
            label = 'noise'
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, mono=True)

            # Convert signal_data to a CuPy array
            signal_data = cp.asarray(signal_data)

            # Calculate the total number of segments possible
            total_segments = len(signal_data) // segment_sr_required

            for i in range(total_segments):
                start = int(i * segment_sr_required)
                end = int(start + segment_sr_required)

                segment = signal_data[start:end]

                if len(segment) != segment_sr_required:
                    continue

                # Perform operations using CuPy functions
                segment = segment / cp.max(cp.abs(segment))

                # Compute MFCC
                mfcc = librosa.feature.mfcc(y=cp.asnumpy(segment), sr=SR, n_mfcc=13)

                # Convert mfcc back to a CuPy array
                mfcc = cp.asarray(mfcc)

                mfcc_data.append(mfcc)
                labels.append(label)

                if label == 'drone':
                    no_of_drone_samples += 1
                elif label == 'swarm':
                    no_of_swarm_samples += 1
                elif label == 'aircraft':
                    no_of_aircraft_samples += 1
                elif label == 'noise':
                    no_of_noise_samples += 1

            # Free the GPU memory used by the arrays after each loop iteration
            cp.cuda.Device().synchronize()

    # Convert the mfcc_data from list of cupy arrays to a single cupy array
    mfcc_data = cp.stack(mfcc_data)

    mean_mfcc = cp.mean(mfcc_data)
    std_mfcc = cp.std(mfcc_data)
    normalized_mfcc_data = (mfcc_data - mean_mfcc) / std_mfcc

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm samples:", no_of_swarm_samples,
          "\nAircraft samples:", no_of_aircraft_samples,
          "\nNoise samples:", no_of_noise_samples)

    return normalized_mfcc_data, labels

# Rest of the mix_audio function remains unchanged...

# Specify the dataset directory, sample rate (SR), and limit
dataset_dir = '/home/user/Desktop/deepak/new_mixed'
SR = 44100
Limit = 21000
chunk_durations = [1.0, 0.8, 0.6, 0.4, 0.2, 0.1]  # Chunk durations in seconds

for chunk_duration in chunk_durations:
    # Mix audio and extract PSD data with labels
    psd_data, labels = mix_audio(dataset_dir, chunk_duration, SR, Limit)

    # Use LabelEncoder to convert labels to numeric values
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)

    # Flatten the PSD data
    psd_data_flattened = cp.reshape(psd_data, (psd_data.shape[0], -1))

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        psd_data_flattened, labels_encoded, test_size=0.3, random_state=42
    )

    # Convert CuPy arrays to NumPy arrays when passing to SVM classifier
    X_train_np = cp.asnumpy(X_train)
    X_test_np = cp.asnumpy(X_test)
    y_train_np = cp.asnumpy(y_train)
    y_test_np = cp.asnumpy(y_test)

    # Create an SVM classifier
    clf = svm.SVC(C=10, kernel='rbf', gamma='scale')

    # Train the SVM classifier using NumPy arrays
    clf.fit(X_train_np, y_train_np)

    # Make predictions on the test set using NumPy arrays
    y_pred_np = clf.predict(X_test_np)

    # Convert NumPy arrays back to CuPy arrays for accuracy calculation
    y_test_cp = cp.asarray(y_test_np)
    y_pred_cp = cp.asarray(y_pred_np)

    # Calculate the accuracy
    accuracy = accuracy_score(y_test_cp.get(), y_pred_cp.get())
    print(f"Chunk Duration: {chunk_duration} s | Accuracy: {accuracy:.4f}")

# Free the GPU memory after the loop is finished executing
cp.cuda.Device().synchronize()

--------------------------------------------------------------------------------

  CuPy may not function correctly because multiple CuPy packages are installed
  in your environment:

    cupy, cupy-cuda12x

  Follow these steps to resolve this issue:

    1. For all packages listed above, run the following command to remove all
       existing CuPy installations:

         $ pip uninstall <package_name>

      If you previously installed CuPy via conda, also run the following:

         $ conda uninstall cupy

    2. Install the appropriate CuPy package.
       Refer to the Installation Guide for detailed instructions.

         https://docs.cupy.dev/en/stable/install.html

--------------------------------------------------------------------------------

2023-07-20 23:29:48.224658: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA

Drone samples: 15723 
Swarm samples: 15723 
Aircraft samples: 15723 
Noise samples: 15723
Chunk Duration: 1.0 s | Accuracy: 0.8662
Drone samples: 19653 
Swarm samples: 19653 
Aircraft samples: 19653 
Noise samples: 19653
Chunk Duration: 0.8 s | Accuracy: 0.8620
Drone samples: 26205 
Swarm samples: 26205 
Aircraft samples: 26205 
Noise samples: 26205
Chunk Duration: 0.6 s | Accuracy: 0.8589
Drone samples: 39307 
Swarm samples: 39307 
Aircraft samples: 39307 
Noise samples: 39307
Chunk Duration: 0.4 s | Accuracy: 0.8536
Drone samples: 78615 
Swarm samples: 78615 
Aircraft samples: 78615 
Noise samples: 78615
Chunk Duration: 0.2 s | Accuracy: 0.8363
Drone samples: 157230 
Swarm samples: 157230 
Aircraft samples: 157230 
Noise samples: 157230
Chunk Duration: 0.1 s | Accuracy: 0.8083


# Mel

In [None]:
import os
import cupy as cp
import librosa
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import os
import cupy as cp
import librosa
from sklearn.model_selection import train_test_split
from cuml import SVC as cuSVC  # Import cuML's SVM
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
# Specify which GPU to use (0-indexed)
gpu_id = 1  # Use GPU 1

# Set the GPU device
cp.cuda.Device(gpu_id).use()

def mix_audio(main_folder_path, chunk_duration_seconds, SR, Limit):
    mel_data = []
    labels = []

    segment_sr_required = int(chunk_duration_seconds * SR)  # Calculate the required duration in samples

    no_of_drone_samples = 0
    no_of_swarm_samples = 0
    no_of_aircraft_samples = 0
    no_of_noise_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            label = 'drone'
        elif 'swarm'.lower() in file_name.lower():
            label = 'swarm'
        elif 'aircraft'.lower() in file_name.lower():
            label = 'aircraft'
        elif 'noise'.lower() in file_name.lower():
            label = 'noise'
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, mono=True)

            # Convert signal_data to a CuPy array
            signal_data = cp.asarray(signal_data)

            # Calculate the total number of segments possible
            total_segments = len(signal_data) // segment_sr_required

            for i in range(total_segments):
                start = int(i * segment_sr_required)
                end = int(start + segment_sr_required)

                segment = signal_data[start:end]

                if len(segment) != segment_sr_required:
                    continue

                # Perform operations using CuPy functions
                segment = segment / cp.max(cp.abs(segment))

                # Compute Mel spectrogram
                mel_spectrogram = librosa.feature.melspectrogram(y=cp.asnumpy(segment), sr=SR, n_mels=128)

                # Convert mel_spectrogram back to a CuPy array
                mel_spectrogram = cp.asarray(mel_spectrogram)

                mel_data.append(mel_spectrogram)
                labels.append(label)

                if label == 'drone':
                    no_of_drone_samples += 1
                elif label == 'swarm':
                    no_of_swarm_samples += 1
                elif label == 'aircraft':
                    no_of_aircraft_samples += 1
                elif label == 'noise':
                    no_of_noise_samples += 1

            # Free the GPU memory used by the arrays after each loop iteration
            cp.cuda.Device().synchronize()

    # Convert the mel_data from list of cupy arrays to a single cupy array
    mel_data = cp.stack(mel_data)

    mean_mel = cp.mean(mel_data)
    std_mel = cp.std(mel_data)
    normalized_mel_data = (mel_data - mean_mel) / std_mel

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm samples:", no_of_swarm_samples,
          "\nAircraft samples:", no_of_aircraft_samples,
          "\nNoise samples:", no_of_noise_samples)

    return normalized_mel_data, labels

# Rest of the mix_audio function remains unchanged...

# ... Rest of the code ...

# Specify the dataset directory, sample rate (SR), and limit
dataset_dir = '/home/user/Desktop/deepak/new_mixed'
SR = 44100
Limit = 21000
# chunk_durations = [1.0, 0.8, 0.6, 0.4, 0.2, 0.1]  # Chunk durations in seconds
chunk_durations = [1.0]
# Specify which GPU to use (0-indexed)
gpu_id = 1  # Use GPU 1

# Set the GPU device
cp.cuda.Device(gpu_id).use()

for chunk_duration in chunk_durations:
    # Mix audio and extract Mel spectrogram data with labels
    mel_data, labels = mix_audio(dataset_dir, chunk_duration, SR, Limit)

    # Use LabelEncoder to convert labels to numeric values
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)

    # Flatten the Mel spectrogram data
    mel_data_flattened = cp.reshape(mel_data, (mel_data.shape[0], -1))

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        mel_data_flattened, labels_encoded, test_size=0.3, random_state=42
    )

    # Create an SVM classifier with cuML using CuPy arrays
    clf = cuSVC(C=10, kernel='rbf', gamma='scale')

    # Train the SVM classifier using CuPy arrays
    clf.fit(X_train, y_train)

    # Make predictions on the test set using CuPy arrays
    y_pred = clf.predict(X_test)

    # Convert CuPy arrays to NumPy arrays for accuracy calculation
    y_test_np = cp.asnumpy(y_test)
    y_pred_np = cp.asnumpy(y_pred)

    # Calculate the accuracy
    accuracy = accuracy_score(y_test_np, y_pred_np)
    print(f"Chunk Duration: {chunk_duration} s | Accuracy: {accuracy:.4f}")

# Free the GPU memory after the loop is finished executing
cp.cuda.Device().synchronize()

# psd

In [None]:
import os
import cupy as cp
import librosa
from librosa import util
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

import os
import cupy as cp
import librosa
from librosa import util

import os
import cupy as cp
import librosa
from librosa import util
from sklearn.model_selection import train_test_split
from cuml.svm import SVC as cuSVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Use CUDA pinned memory for transfer between CPU and GPU
def to_gpu_pinned(data):
    return cp.cuda.runtime.hostRegister(data)

def mix_audio(main_folder_path, chunk_duration_seconds, SR, Limit):
    psd_data = []
    labels = []

    segment_sr_required = int(chunk_duration_seconds * SR)  # Calculate the required duration in samples

    no_of_drone_samples = 0
    no_of_swarm_samples = 0
    no_of_aircraft_samples = 0
    no_of_noise_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            label = 'drone'
        elif 'swarm'.lower() in file_name.lower():
            label = 'swarm'
        elif 'aircraft'.lower() in file_name.lower():
            label = 'aircraft'
        elif 'noise'.lower() in file_name.lower():
            label = 'noise'
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, mono=True)

            # Convert signal_data to a CuPy array and move to GPU memory
            signal_data_gpu = cp.asarray(signal_data)

            # Calculate the total number of segments possible
            total_segments = len(signal_data_gpu) // segment_sr_required

            for i in range(total_segments):
                start = int(i * segment_sr_required)
                end = int(start + segment_sr_required)

                segment_gpu = signal_data_gpu[start:end]

                if len(segment_gpu) != segment_sr_required:
                    continue

                # Perform operations using CuPy functions in-place to save memory
                segment_gpu /= cp.max(cp.abs(segment_gpu))

                # Compute STFT in-place on the GPU using CuPy's fft module
                stft_result_gpu = cp.abs(cp.fft.fft(segment_gpu, n=2048, axis=-1, norm=None))

                # Compute PSD (log base 10) in-place on the GPU
                psd_gpu = cp.log10(cp.square(stft_result_gpu) + 1e-9)

                psd_data.append(psd_gpu)
                labels.append(label)

                if label == 'drone':
                    no_of_drone_samples += 1
                elif label == 'swarm':
                    no_of_swarm_samples += 1
                elif label == 'aircraft':
                    no_of_aircraft_samples += 1
                elif label == 'noise':
                    no_of_noise_samples += 1

                # Synchronize the GPU to release memory
                cp.cuda.Stream.null.synchronize()

            # Free the GPU memory used by the arrays after each loop iteration
            cp.cuda.Stream.null.synchronize()
            del signal_data_gpu
            del segment_gpu

    # Convert the psd_data from list of cupy arrays to a single cupy array
    psd_data = cp.stack(psd_data)

    mean_psd = cp.mean(psd_data)
    std_psd = cp.std(psd_data)
    normalized_psd_data = (psd_data - mean_psd) / std_psd

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm samples:", no_of_swarm_samples,
          "\nAircraft samples:", no_of_aircraft_samples,
          "\nNoise samples:", no_of_noise_samples)

    return normalized_psd_data, labels


# Rest of the code remains unchanged...


# Rest of the code remains unchanged...


# Specify the dataset directory, sample rate (SR), and limit
dataset_dir = '/home/user/Desktop/deepak/new_mixed'
SR = 44100
Limit = 21000
chunk_durations = [1.0, 0.8, 0.6, 0.4, 0.2, 0.1]  # Chunk durations in seconds

# Specify which GPU to use (0-indexed)
gpu_id = 0  # Use GPU 1

# Set the GPU device
cp.cuda.Device(gpu_id).use()

import os
import cupy as cp
import librosa
from cuml.svm import SVC as cuSVC  # Import cuML's GPU-accelerated SVM
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# ... (previous code) ...

for chunk_duration in chunk_durations:
    # Mix audio and extract PSD data with labels
    psd_data, labels = mix_audio(dataset_dir, chunk_duration, SR, Limit)

    # Use LabelEncoder to convert labels to numeric values
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)

    # Split the dataset into training and testing sets (using CuPy arrays)
    X_train, X_test, y_train, y_test = train_test_split(
        psd_data, labels_encoded, test_size=0.3, random_state=42
    )

    # Create a GPU-accelerated SVM classifier using cuML
    clf = cuSVC(C=10, kernel='rbf', gamma='scale')

    # Train the GPU-accelerated SVM classifier using CuPy arrays
    clf.fit(X_train, y_train)

    # Make predictions on the test set using CuPy arrays
    y_pred = clf.predict(X_test)

    # Calculate the accuracy
    accuracy = accuracy_score(cp.asnumpy(y_test), cp.asnumpy(y_pred))
    print(f"Chunk Duration: {chunk_duration} s | Accuracy: {accuracy:.4f}")

In [None]:
import os
import cupy as cp
import librosa
from librosa import util
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

import os
import cupy as cp
import librosa
from librosa import util

import os
import cupy as cp
import librosa
from librosa import util
from sklearn.model_selection import train_test_split
from cuml.svm import SVC as cuSVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

# Use CUDA pinned memory for transfer between CPU and GPU
def to_gpu_pinned(data):
    return cp.cuda.runtime.hostRegister(data)

import os
import cupy as cp
import librosa
from librosa import util

def mix_audio(main_folder_path, chunk_duration_seconds, SR, Limit):
    psd_data = []
    labels = []

    segment_sr_required = int(chunk_duration_seconds * SR)  # Calculate the required duration in samples

    no_of_drone_samples = 0
    no_of_swarm_samples = 0
    no_of_aircraft_samples = 0
    no_of_noise_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            label = 'drone'
        elif 'swarm'.lower() in file_name.lower():
            label = 'swarm'
        elif 'aircraft'.lower() in file_name.lower():
            label = 'aircraft'
        elif 'noise'.lower() in file_name.lower():
            label = 'noise'
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            try:
                signal_data, sr = librosa.load(file_path, sr=None, mono=True, dtype=cp.float32)

                # Convert signal_data to a CuPy array and move to GPU memory
                signal_data_gpu = cp.asarray(signal_data)

                # Calculate the total number of segments possible
                total_segments = len(signal_data_gpu) // segment_sr_required

                for i in range(total_segments):
                    start = int(i * segment_sr_required)
                    end = int(start + segment_sr_required)

                    segment_gpu = signal_data_gpu[start:end]

                    if len(segment_gpu) != segment_sr_required:
                        continue

                    # Perform operations using CuPy functions in-place to save memory
                    segment_gpu /= cp.max(cp.abs(segment_gpu))

                    # Compute STFT in-place on the GPU using CuPy's fft module
                    stft_result_gpu = cp.abs(cp.fft.fft(segment_gpu, n=2048, axis=-1, norm=None))

                    # Compute PSD (log base 10) in-place on the GPU
                    psd_gpu = cp.log10(cp.square(stft_result_gpu) + 1e-9)

                    psd_data.append(psd_gpu)
                    labels.append(label)

                    if label == 'drone':
                        no_of_drone_samples += 1
                    elif label == 'swarm':
                        no_of_swarm_samples += 1
                    elif label == 'aircraft':
                        no_of_aircraft_samples += 1
                    elif label == 'noise':
                        no_of_noise_samples += 1

                    # Synchronize the GPU to release memory
                    cp.cuda.Stream.null.synchronize()

                # Free the GPU memory used by the arrays after each loop iteration
                cp.cuda.Stream.null.synchronize()
                del signal_data_gpu
                del segment_gpu

            except Exception as e:
                print(f"Error processing file '{file_name}': {str(e)}")
                continue

    # Convert the psd_data from list of cupy arrays to a single cupy array
    psd_data = cp.stack(psd_data)

    mean_psd = cp.mean(psd_data)
    std_psd = cp.std(psd_data)
    normalized_psd_data = (psd_data - mean_psd) / std_psd

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm samples:", no_of_swarm_samples,
          "\nAircraft samples:", no_of_aircraft_samples,
          "\nNoise samples:", no_of_noise_samples)

    return normalized_psd_data, labels



# Rest of the code remains unchanged...


# Rest of the code remains unchanged...


# Specify the dataset directory, sample rate (SR), and limit
dataset_dir = '/home/user/Desktop/deepak/new_mixed'
SR = 44100
Limit = 21000
chunk_durations = [0.2, 0.1]  # Chunk durations in seconds

# Specify which GPU to use (0-indexed)
gpu_id = 1  # Use GPU 1

# Set the GPU device
cp.cuda.Device(gpu_id).use()

import os
import cupy as cp
import librosa
from cuml.svm import SVC as cuSVC  # Import cuML's GPU-accelerated SVM
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# ... (previous code) ...

for chunk_duration in chunk_durations:
    # Mix audio and extract PSD data with labels
    psd_data, labels = mix_audio(dataset_dir, chunk_duration, SR, Limit)

    # Use LabelEncoder to convert labels to numeric values
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)

    # Split the dataset into training and testing sets (using CuPy arrays)
    X_train, X_test, y_train, y_test = train_test_split(
        psd_data, labels_encoded, test_size=0.3, random_state=42
    )

    # Create a GPU-accelerated SVM classifier using cuML
    clf = cuSVC(C=10, kernel='rbf', gamma='scale')

    # Train the GPU-accelerated SVM classifier using CuPy arrays
    clf.fit(X_train, y_train)

    # Make predictions on the test set using CuPy arrays
    y_pred = clf.predict(X_test)

    # Calculate the accuracy
    accuracy = accuracy_score(cp.asnumpy(y_test), cp.asnumpy(y_pred))
    print(f"Chunk Duration: {chunk_duration} s | Accuracy: {accuracy:.4f}")

# Zcr

In [None]:
import os
import cupy as cp
import librosa
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import os
import cupy as cp
import librosa
from sklearn.model_selection import train_test_split
from cuml import SVC as cuSVC  # Import cuML's SVM
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
# Specify which GPU to use (0-indexed)
gpu_id = 0  # Use GPU 1

# Set the GPU device
cp.cuda.Device(gpu_id).use()
# Modify the mix_audio_zcr function
def mix_audio_zcr(main_folder_path, chunk_duration_seconds, SR, Limit):
    zcr_data = []
    labels = []

    segment_sr_required = int(chunk_duration_seconds * SR)

    no_of_drone_samples = 0
    no_of_swarm_samples = 0
    no_of_aircraft_samples = 0
    no_of_noise_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            label = 'drone'
        elif 'swarm'.lower() in file_name.lower():
            label = 'swarm'
        elif 'aircraft'.lower() in file_name.lower():
            label = 'aircraft'
        elif 'noise'.lower() in file_name.lower():
            label = 'noise'
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, mono=True)

            total_segments = len(signal_data) // segment_sr_required

            for i in range(total_segments):
                start = int(i * segment_sr_required)
                end = int(start + segment_sr_required)

                segment = signal_data[start:end]

                if len(segment) != segment_sr_required:
                    continue

                zcr = librosa.feature.zero_crossing_rate(y=segment)[0]

                # Calculate log(ZCR) - Log base 10
                zcr_log = np.log10(zcr)

                if not np.any(np.isnan(zcr_log)) and not np.any(np.isinf(zcr_log)):
                    zcr_data.append(cp.asarray(zcr_log))
                    labels.append(label)

                    if label == 'drone':
                        no_of_drone_samples += 1
                    elif label == 'swarm':
                        no_of_swarm_samples += 1
                    elif label == 'aircraft':
                        no_of_aircraft_samples += 1
                    elif label == 'noise':
                        no_of_noise_samples += 1

    zcr_data = cp.stack(zcr_data)
    mean_zcr = cp.mean(zcr_data)
    std_zcr = cp.std(zcr_data)
    normalized_zcr_data = (zcr_data - mean_zcr) / std_zcr

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm samples:", no_of_swarm_samples,
          "\nAircraft samples:", no_of_aircraft_samples,
          "\nNoise samples:", no_of_noise_samples)

    return normalized_zcr_data, labels


# ... Rest of the code ...

# Specify the dataset directory, sample rate (SR), and limit
dataset_dir = '/home/user/Desktop/deepak/new_mixed'
SR = 44100
Limit = 21000
chunk_durations = [1.0, 0.8, 0.6, 0.4, 0.2, 0.1]  # Chunk durations in seconds

# Specify which GPU to use (0-indexed)
gpu_id = 0  # Use GPU 1

# Set the GPU device
cp.cuda.Device(gpu_id).use()

# ... Rest of the code ...

# Inside the loop, use the modified mix_audio_zcr function

for chunk_duration in chunk_durations:
    # Mix audio and extract ZCR data with labels
    zcr_data, labels = mix_audio_zcr(dataset_dir, chunk_duration, SR, Limit)

    # Use LabelEncoder to convert labels to numeric values
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        zcr_data, labels_encoded, test_size=0.3, random_state=42
    )

    # Create an SVM classifier with cuML using CuPy arrays
    clf = cuSVC(C=10, kernel='rbf', gamma='scale')

    # Train the SVM classifier using CuPy arrays
    clf.fit(X_train, y_train)

    # Make predictions on the test set using CuPy arrays
    y_pred = clf.predict(X_test)

    # Calculate the accuracy using CuPy arrays
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Chunk Duration: {chunk_duration} s | Accuracy: {accuracy:.4f}")

# Free the GPU memory after the loop is finished executing
cp.cuda.Device().synchronize()

# spectral centroid

In [None]:
import os
import cupy as cp
import librosa
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import os
import cupy as cp
import librosa
from sklearn.model_selection import train_test_split
from cuml import SVC as cuSVC  # Import cuML's SVM
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
# Specify which GPU to use (0-indexed)
gpu_id = 0  # Use GPU 1

# Set the GPU device
cp.cuda.Device(gpu_id).use()

def mix_audio(main_folder_path, chunk_duration_seconds, SR, Limit):
    spectral_centroids = []  # To store the spectral centroid values
    labels = []

    segment_sr_required = int(chunk_duration_seconds * SR)

    no_of_drone_samples = 0
    no_of_swarm_samples = 0
    no_of_aircraft_samples = 0
    no_of_noise_samples = 0

    for file_name in os.listdir(main_folder_path):
        if 'drone'.lower() in file_name.lower():
            label = 'drone'
        elif 'swarm'.lower() in file_name.lower():
            label = 'swarm'
        elif 'aircraft'.lower() in file_name.lower():
            label = 'aircraft'
        elif 'noise'.lower() in file_name.lower():
            label = 'noise'
        else:
            continue

        if file_name.endswith('.wav') or file_name.endswith('.WAV') or file_name.endswith('.mp3'):
            file_path = os.path.join(main_folder_path, file_name)
            signal_data, sr = librosa.load(file_path, sr=None, mono=True)

            signal_data = cp.asarray(signal_data)

            total_segments = len(signal_data) // segment_sr_required

            for i in range(total_segments):
                start = int(i * segment_sr_required)
                end = int(start + segment_sr_required)

                segment = signal_data[start:end]

                if len(segment) != segment_sr_required:
                    continue

                # Calculate the spectral centroid using librosa
                spectral_centroid = librosa.feature.spectral_centroid(y=cp.asnumpy(segment), sr=SR)
                # Convert to log scale using CuPy arrays
                spectral_centroid_log = cp.log10(cp.asarray(spectral_centroid) + 1e-10)

                spectral_centroids.append(spectral_centroid_log)
                labels.append(label)

                if label == 'drone':
                    no_of_drone_samples += 1
                elif label == 'swarm':
                    no_of_swarm_samples += 1
                elif label == 'aircraft':
                    no_of_aircraft_samples += 1
                elif label == 'noise':
                    no_of_noise_samples += 1

            cp.cuda.Device().synchronize()

    spectral_centroids = cp.stack(spectral_centroids)

    mean_spectral_centroid = cp.mean(spectral_centroids)
    std_spectral_centroid = cp.std(spectral_centroids)
    normalized_spectral_centroids = (spectral_centroids - mean_spectral_centroid) / std_spectral_centroid

    print("Drone samples:", no_of_drone_samples,
          "\nSwarm samples:", no_of_swarm_samples,
          "\nAircraft samples:", no_of_aircraft_samples,
          "\nNoise samples:", no_of_noise_samples)

    return normalized_spectral_centroids, labels


# Rest of the mix_audio function remains unchanged...

# ... Rest of the code ...

# Specify the dataset directory, sample rate (SR), and limit
dataset_dir = '/home/user/Desktop/deepak/new_mixed'
SR = 44100
Limit = 21000
chunk_durations = [1.0, 0.8, 0.6, 0.4, 0.2, 0.1]  # Chunk durations in seconds

# Specify which GPU to use (0-indexed)
gpu_id = 0  # Use GPU 1

# Set the GPU device
cp.cuda.Device(gpu_id).use()

for chunk_duration in chunk_durations:
    # Mix audio and extract spectral centroids data with labels
    spectral_centroids, labels = mix_audio(dataset_dir, chunk_duration, SR, Limit)

    # Use LabelEncoder to convert labels to numeric values
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)

    # Flatten the spectral centroids data
    spectral_centroids_flattened = cp.reshape(spectral_centroids, (spectral_centroids.shape[0], -1))

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        spectral_centroids_flattened, labels_encoded, test_size=0.3, random_state=42
    )

    # Create an SVM classifier with cuML using CuPy arrays
    clf = cuSVC(C=10, kernel='rbf', gamma='scale')

    # Train the SVM classifier using CuPy arrays
    clf.fit(X_train, y_train)

    # Make predictions on the test set using CuPy arrays
    y_pred = clf.predict(X_test)

    # Convert CuPy arrays to NumPy arrays for accuracy calculation
    y_test_np = cp.asnumpy(y_test)
    y_pred_np = cp.asnumpy(y_pred)

    # Calculate the accuracy
    accuracy = accuracy_score(y_test_np, y_pred_np)
    print(f"Chunk Duration: {chunk_duration} s | Accuracy: {accuracy:.4f}")

# Free the GPU memory after the loop is finished executing
cp.cuda.Device().synchronize()

--------------------------------------------------------------------------------

  CuPy may not function correctly because multiple CuPy packages are installed
  in your environment:

    cupy, cupy-cuda12x

  Follow these steps to resolve this issue:

    1. For all packages listed above, run the following command to remove all
       existing CuPy installations:

         $ pip uninstall <package_name>

      If you previously installed CuPy via conda, also run the following:

         $ conda uninstall cupy

    2. Install the appropriate CuPy package.
       Refer to the Installation Guide for detailed instructions.

         https://docs.cupy.dev/en/stable/install.html

--------------------------------------------------------------------------------



Drone samples: 15723 
Swarm samples: 15723 
Aircraft samples: 15723 
Noise samples: 15723
Chunk Duration: 1.0 s | Accuracy: 0.4081
Drone samples: 19653 
Swarm samples: 19653 
Aircraft samples: 19653 
Noise samples: 19653
Chunk Duration: 0.8 s | Accuracy: 0.3818
Drone samples: 26205 
Swarm samples: 26205 
Aircraft samples: 26205 
Noise samples: 26205
Chunk Duration: 0.6 s | Accuracy: 0.3834
Drone samples: 39307 
Swarm samples: 39307 
Aircraft samples: 39307 
Noise samples: 39307
Chunk Duration: 0.4 s | Accuracy: 0.3836
Drone samples: 78615 
Swarm samples: 78615 
Aircraft samples: 78615 
Noise samples: 78615
Chunk Duration: 0.2 s | Accuracy: 0.3730
Drone samples: 157230 
Swarm samples: 157230 
Aircraft samples: 157230 
Noise samples: 157230
Chunk Duration: 0.1 s | Accuracy: 0.3542
