In [9]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import joblib
import librosa
from scipy.linalg import sqrtm

In [10]:
DATA_FOLDER = '../data/data_test_train'  
NUM_COMPONENTS = 128  
NUM_IVECTORS = 13     
COVARIANCE_TYPE = 'diag' 
EPS = 1e-6 

In [11]:
ubm = joblib.load('ubm_model.pkl') 
t_matrix = joblib.load('t_matrix.pkl')

In [12]:
def normalize_audio(y):
    return librosa.util.normalize(y)

def extract_mfcc(audio_file):
    y, sr = librosa.load(audio_file, sr=16000)
    y = normalize_audio(y)  
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    delta_mfcc = librosa.feature.delta(mfcc)
    delta2_mfcc = librosa.feature.delta(mfcc, order=2)
    mfcc_features = np.concatenate((mfcc, delta_mfcc, delta2_mfcc), axis=0)  
    return mfcc_features.T  

def compute_baum_welch_statistics(mfccs, ubm):
    responsibilities = ubm.predict_proba(mfccs)
    N = np.sum(responsibilities, axis=0)
    F = np.dot(responsibilities.T, mfccs)
    return N, F

def extract_ivector(F, N, t_matrix):
    N = N + EPS
    ivector = t_matrix.transform(F / N[:, np.newaxis])
    return ivector.flatten() 

def length_normalize(ivector):
    return ivector / np.linalg.norm(ivector)

def train_plda(ivectors, labels):
    plda = LinearDiscriminantAnalysis()
    plda.fit(ivectors, labels)
    return plda

def verify_speaker_plda(test_ivector, reference_ivector, plda):
    test_ivector = test_ivector.reshape(1, -1)
    reference_ivector = reference_ivector.reshape(1, -1)
    score = plda.predict_proba(np.vstack((test_ivector, reference_ivector)))[0, 1]
    return score

def process_folder(folder_path):
    audio_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.lower().endswith((".mp3",".mp4",".wav",".flac"))]

    ivectors = []
    for audio_file in audio_files:
        mfcc_features = extract_mfcc(audio_file)
        scaler = StandardScaler()
        mfcc_features = scaler.fit_transform(mfcc_features)
        N, F = compute_baum_welch_statistics(mfcc_features, ubm)
        ivector = extract_ivector(F, N, t_matrix)
        ivector = length_normalize(ivector)
        ivectors.append(ivector)

    return ivectors

def create_dataset(data_folder):
    dataset = []

    for folder_name in os.listdir(data_folder):
        folder_path = os.path.join(data_folder, folder_name)
        if os.path.isdir(folder_path):
                ivector1, ivector2 = process_folder(folder_path)
                dataset.append({
                    'label': 1,
                    'ivector1': ivector1,
                    'ivector2': ivector2
                })

    df = pd.DataFrame(dataset)
    return df


In [13]:
dataset_df = create_dataset(DATA_FOLDER)
dataset_df.to_csv('speaker_dataset.csv', index=False)