In [1]:
import numpy as np
# import pandas as pd
import librosa
# import tensorflow as tf
# from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import os
import torch
import torchaudio
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [2]:

def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # For CUDA
    torch.cuda.manual_seed_all(seed)  # For multi-GPU
    np.random.seed(seed)

    # Ensures deterministic behavior (optional, can slow things down)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [3]:
set_seed(42)

In [4]:
data_file= "data/processed3/50_speakers_audio_data"

In [8]:
import os
import librosa
import numpy as np
from sklearn.preprocessing import StandardScaler
from concurrent.futures import ProcessPoolExecutor

# Function to process a single folder
def process_folder(folder_path, speaker_id, n_mfcc, max_pad_len, mfcc_window_len):
    results = []
    try:
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.wav'):  # Only process .wav files
                file_path = os.path.join(folder_path, file_name)
                windows = process_audio(file_path, n_mfcc, max_pad_len, mfcc_window_len)
                results.extend((window, speaker_id) for window in windows)
    except Exception as e:
        print(f"Error processing folder {folder_path}: {e}")
    return results

# Function to process a single audio file
def process_audio(file_path, n_mfcc, max_pad_len, mfcc_window_len):
    try:
        audio, sr = librosa.load(file_path, sr=None)

        # Extract MFCC features
        org_mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
        delta_mfcc = librosa.feature.delta(org_mfcc)
        delta2_mfcc = librosa.feature.delta(org_mfcc, order=2)
        mfcc = np.concatenate((org_mfcc, delta_mfcc, delta2_mfcc), axis=0)

        scaler = StandardScaler()
        mfcc = scaler.fit_transform(mfcc.T)

        # Padding or truncating the MFCC feature array
        if mfcc.shape[0] < max_pad_len:
            pad_width = max_pad_len - mfcc.shape[0]
            mfcc = np.pad(mfcc, pad_width=((0, pad_width), (0, 0)), mode='constant')
        else:
            mfcc = mfcc[:max_pad_len, :]

        # Slice the MFCC into windows of mfcc_window_len
        num_windows = mfcc.shape[0] // mfcc_window_len
        windows = []
        for i in range(num_windows):
            start = i * mfcc_window_len
            end = start + mfcc_window_len
            mfcc_window = mfcc[start:end, :]
            windows.append(mfcc_window)

        return windows
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return []

# Main function to extract MFCC features in parallel
def extract_mfcc_parallel(parent_dir, sub_folders, n_mfcc=13, max_pad_len=129, mfcc_window_len=43):
    x = []
    y = []

    # Use ProcessPoolExecutor to process folders in parallel
    with ProcessPoolExecutor() as executor:
        futures = []
        for folder in sub_folders:
            folder_path = os.path.join(parent_dir, folder)
            speaker_id = int(folder[-2:])  # Assuming folder name contains speaker ID at the end
            futures.append(executor.submit(process_folder, folder_path, speaker_id, n_mfcc, max_pad_len, mfcc_window_len))

        for future in futures:
            try:
                results = future.result()
                for window, speaker_id in results:
                    x.append(window)
                    y.append(speaker_id)
            except Exception as e:
                print(f"Error processing folder: {e}")

    x = np.array(x)
    y = np.array(y)
    return x, y

In [9]:
no_speakers_file=50

def speakers_list(no_speakers_file ,data_file ):
    speaker_l = []

    # Get all subfolders in the data_file directory
    subfolders = [f.name for f in os.scandir(data_file) if f.is_dir()]

    # Check if the requested number of speakers is available
    if no_speakers_file > len(subfolders):
        raise ValueError(f"Requested {no_speakers_file} speakers, but only {len(subfolders)} available.")

    # Select the first 'no_speakers_file' subfolders
    speaker_l = subfolders[:no_speakers_file]

    return speaker_l

speaker_list = speakers_list(no_speakers_file,data_file )


In [None]:
x,y= extract_mfcc_parallel(data_file,speaker_list)