In [33]:
!pip install -q kaggle



In [2]:
!mkdir -p ~/.kaggle
!cp /content/kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

cp: cannot stat '/content/kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory


In [3]:
! kaggle datasets download kongaevans/speaker-recognition-dataset

Dataset URL: https://www.kaggle.com/datasets/kongaevans/speaker-recognition-dataset
License(s): unknown
Downloading speaker-recognition-dataset.zip to /content
 97% 223M/231M [00:01<00:00, 167MB/s]
100% 231M/231M [00:01<00:00, 141MB/s]


In [None]:
! unzip /content/speaker-recognition-dataset.zip

In [1]:
import os
import numpy as np
import librosa
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

import time
from concurrent.futures import ThreadPoolExecutor
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
def load_data(data_dir):
    data = []
    labels = []
    speakers = ['Benjamin_Netanyau', 'Jens_Stoltenberg', 'Julia_Gillard', 'Magaret_Tarcher', 'Nelson_Mandela']

    for speaker in speakers:
        speaker_dir = os.path.join(data_dir, speaker)
        for filename in os.listdir(speaker_dir):
            if filename.endswith('.wav'):
                file_path = os.path.join(speaker_dir, filename)
                data.append(file_path)
                labels.append(speaker)

    return data, labels

In [3]:
data_dir = '/content/16000_pcm_speeches/'
data, labels = load_data(data_dir)

In [4]:
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=16000)
    n_mfcc = 13
    mfcc = librosa.feature.mfcc(y=y, n_mfcc= n_mfcc)
    mfcc_mean = np.mean(mfcc, axis=1)

    return mfcc_mean

In [5]:
def normalize_features(features):
    scaler = StandardScaler()
    return scaler.fit_transform(features)

In [6]:
start_time = time.time()
features = [extract_features(file) for file in data]
end_time = time.time()
execution_time = end_time - start_time
print(f"time taken: {execution_time} s")


time taken: 65.98510026931763 s


In [7]:
# Timing feature extraction in parallel
start_time_parallel = time.time()

with ThreadPoolExecutor() as executor:
    features_parallel = executor.map(extract_features, data)


end_time_parallel = time.time()
execution_time_parallel = end_time_parallel - start_time_parallel
print(f"time taken for parallel: {execution_time_parallel} s")


time taken for parallel: 65.09306764602661 s


In [8]:
features = normalize_features(features)

In [9]:


# Convert labels to numeric form
label_mapping = {label: idx for idx, label in enumerate(set(labels))}
numeric_labels = [label_mapping[label] for label in labels]





In [13]:
# Split the data into training and testing sets randomly
X_train, X_test, y_train, y_test = train_test_split(features, numeric_labels, test_size=0.2)

# Ensure balanced data split
print(f"Number of samples in training set: {len(X_train)}")
print(f"Number of samples in testing set: {len(X_test)}")

Number of samples in training set: 6000
Number of samples in testing set: 1501


In [14]:

from sklearn.metrics import confusion_matrix, classification_report

# Define the SGDClassifier with logistic regression loss function
model = SGDClassifier(loss='log_loss', max_iter=1000, learning_rate='constant', eta0=0.01)

# Train the model
model.fit(X_train, y_train)

# Predict labels for the test set
y_pred = model.predict(X_test)

In [15]:




# Calculate accuracy
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy}")

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate classification report (F1 score, recall, precision)
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)


Accuracy: 0.9653564290473018
Confusion Matrix:
[[267   7   3   1   4]
 [ 10 301   0   0   3]
 [  2   1 310   4   4]
 [  0   0   5 290   0]
 [  5   2   1   0 281]]
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.95      0.94       282
           1       0.97      0.96      0.96       314
           2       0.97      0.97      0.97       321
           3       0.98      0.98      0.98       295
           4       0.96      0.97      0.97       289

    accuracy                           0.97      1501
   macro avg       0.97      0.97      0.97      1501
weighted avg       0.97      0.97      0.97      1501



In [16]:
# Define the Logistic Regression model with gradient descent approach
model = LogisticRegression(max_iter=1000, solver='sag', )

# Train the model
model.fit(X_train, y_train)

# Predict labels for the test set
y_pred = model.predict(X_test)

In [17]:




# Calculate accuracy
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy}")

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate classification report (F1 score, recall, precision)
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)


Accuracy: 0.9766822118587608
Confusion Matrix:
[[270   9   0   0   3]
 [  9 300   1   0   4]
 [  1   0 319   1   0]
 [  0   0   0 295   0]
 [  4   2   1   0 282]]
Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.96      0.95       282
           1       0.96      0.96      0.96       314
           2       0.99      0.99      0.99       321
           3       1.00      1.00      1.00       295
           4       0.98      0.98      0.98       289

    accuracy                           0.98      1501
   macro avg       0.98      0.98      0.98      1501
weighted avg       0.98      0.98      0.98      1501

