In [1]:
!pip install -q kaggle

In [2]:
!mkdir -p ~/.kaggle
!cp /content/kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

cp: cannot stat '/content/kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory


In [3]:
! kaggle datasets download kongaevans/speaker-recognition-dataset

Dataset URL: https://www.kaggle.com/datasets/kongaevans/speaker-recognition-dataset
License(s): unknown
Downloading speaker-recognition-dataset.zip to /content
 95% 219M/231M [00:02<00:00, 123MB/s] 
100% 231M/231M [00:02<00:00, 87.9MB/s]


In [None]:
! unzip /content/speaker-recognition-dataset.zip

In [5]:
import os
import numpy as np
import librosa
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

import time
from concurrent.futures import ThreadPoolExecutor
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix, classification_report

In [6]:
def load_data(data_dir):
    data = []
    labels = []
    speakers = ['Benjamin_Netanyau', 'Jens_Stoltenberg', 'Julia_Gillard', 'Magaret_Tarcher', 'Nelson_Mandela']

    for speaker in speakers:
        speaker_dir = os.path.join(data_dir, speaker)
        for filename in os.listdir(speaker_dir):
            if filename.endswith('.wav'):
                file_path = os.path.join(speaker_dir, filename)
                data.append(file_path)
                labels.append(speaker)

    return data, labels

In [7]:
data_dir = '/content/16000_pcm_speeches/'
data, labels = load_data(data_dir)

In [8]:
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=16000)

    #  Spectral Centroid
    spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))

    #  Zero Crossing Rate
    zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))

    #  Root Mean Square Energy
    rmse = np.mean(librosa.feature.rms(y=y))

    return [spectral_centroid, zero_crossing_rate, rmse]

In [9]:
def normalize_features(features):
    scaler = StandardScaler()
    return scaler.fit_transform(features)

In [10]:
start_time = time.time()
features = [extract_features(file) for file in data]
end_time = time.time()
execution_time = end_time - start_time
print(f"time taken: {execution_time} s")


time taken: 69.52728605270386 s


In [11]:
from multiprocessing import cpu_count

cpu_count()

2

In [12]:
# Timing feature extraction in parallel
start_time_parallel = time.time()

with ThreadPoolExecutor() as executor:
    features_parallel = executor.map(extract_features, data)


end_time_parallel = time.time()
execution_time_parallel = end_time_parallel - start_time_parallel
print(f"time taken for parallel: {execution_time_parallel} s")


time taken for parallel: 37.71994400024414 s


In [15]:
features = normalize_features(features)

In [16]:


# Convert labels to numeric form
label_mapping = {label: idx for idx, label in enumerate(set(labels))}
numeric_labels = [label_mapping[label] for label in labels]




In [24]:
# Split the data into training and testing sets randomly
X_train, X_test, y_train, y_test = train_test_split(features, numeric_labels, test_size=0.2, random_state=42)

# Ensure balanced data split
print(f"Number of samples in training set: {len(X_train)}")
print(f"Number of samples in testing set: {len(X_test)}")

Number of samples in training set: 6000
Number of samples in testing set: 1501


In [25]:

from sklearn.metrics import confusion_matrix, classification_report

# Define the SGDClassifier with logistic regression loss function
model = SGDClassifier(loss='log_loss', max_iter=1000, learning_rate='constant', eta0=0.01)

# Train the model
model.fit(X_train, y_train)

# Predict labels for the test set
y_pred = model.predict(X_test)

In [26]:




# Calculate accuracy
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy}")

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate classification report (F1 score, recall, precision)
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)


Accuracy: 0.664890073284477
Confusion Matrix:
[[202  73  15  17   3]
 [ 74 132  63  41   0]
 [  5  46 230   2   0]
 [ 14  10  16 170  73]
 [  0   0   0  51 264]]
Classification Report:
              precision    recall  f1-score   support

           0       0.68      0.65      0.67       310
           1       0.51      0.43      0.46       310
           2       0.71      0.81      0.76       283
           3       0.60      0.60      0.60       283
           4       0.78      0.84      0.81       315

    accuracy                           0.66      1501
   macro avg       0.66      0.67      0.66      1501
weighted avg       0.66      0.66      0.66      1501



In [27]:
# Define the Logistic Regression model with gradient descent approach
model = LogisticRegression(max_iter=1000, solver='sag', )

# Train the model
model.fit(X_train, y_train)

# Predict labels for the test set
y_pred = model.predict(X_test)

In [28]:




# Calculate accuracy
accuracy = model.score(X_test, y_test)
print(f"Accuracy: {accuracy}")

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate classification report (F1 score, recall, precision)
report = classification_report(y_test, y_pred)
print("Classification Report:")
print(report)


Accuracy: 0.715522984676882
Confusion Matrix:
[[190  80  15  25   0]
 [ 42 208  49  11   0]
 [  4  44 224  11   0]
 [ 14   5   5 206  53]
 [  0   0   0  69 246]]
Classification Report:
              precision    recall  f1-score   support

           0       0.76      0.61      0.68       310
           1       0.62      0.67      0.64       310
           2       0.76      0.79      0.78       283
           3       0.64      0.73      0.68       283
           4       0.82      0.78      0.80       315

    accuracy                           0.72      1501
   macro avg       0.72      0.72      0.72      1501
weighted avg       0.72      0.72      0.72      1501

