#### Mount Drive &  Git clone Repo

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!git clone https://github.com/choudhary-dinesh/speaker_verification

Cloning into 'speaker_verification'...
remote: Enumerating objects: 24, done.[K
remote: Counting objects: 100% (24/24), done.[K
remote: Compressing objects: 100% (23/23), done.[K
remote: Total 24 (delta 6), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (24/24), 9.29 KiB | 9.29 MiB/s, done.
Resolving deltas: 100% (6/6), done.


#### Install requirements.txt & Import

In [3]:
%cd /content/speaker_verification

/content/speaker_verification


In [4]:
!pip install -r requirements.txt

Collecting python_speech_features==0.6 (from -r requirements.txt (line 1))
  Downloading python_speech_features-0.6.tar.gz (5.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: python_speech_features
  Building wheel for python_speech_features (setup.py) ... [?25l[?25hdone
  Created wheel for python_speech_features: filename=python_speech_features-0.6-py3-none-any.whl size=5870 sha256=8f1c170c1f3d391fee5178974d9c5d895888194fc2cbff85fdabfdb18a54d44b
  Stored in directory: /root/.cache/pip/wheels/5a/9e/68/30bad9462b3926c29e315df16b562216d12bdc215f4d240294
Successfully built python_speech_features
Installing collected packages: python_speech_features
Successfully installed python_speech_features-0.6


In [5]:
import numpy as np
import pandas as pd
from voxceleb_datasat import prepare_test_train_file_list
from train import train_gmm
from inference import predict_speaker
from evaluation import calculate_metrics, calculate_far_frr_multiclass

#### Put sample voxceleb data (wav files) into Repo/sample_data

In [6]:
!cp /content/drive/MyDrive/Classroom/sample_data.zip -d /content/speaker_verification

In [None]:
!unzip /content/speaker_verification/sample_data.zip

In [8]:
voxceleb_path = '/content/speaker_verification/sample_data/'

#### Prepare dataset & split into train test using voxceleb_datasat.py

In [9]:
train_ratio = 0.8
all_file_paths_train, all_file_paths_test = prepare_test_train_file_list(voxceleb_path,  train_ratio)

Train data size: 776
Test data size: 194


In [10]:
dest = "/content/speaker_verification/speaker_models/"
!mkdir {dest}

#### Train GMM for each speaker using train.py (it will also use feature.py)

In [11]:
train_gmm(all_file_paths_train,dest)

Training model for id10001
GMM trainned for speaker: id10001.gmm
Training model for id10002
GMM trainned for speaker: id10002.gmm
Training model for id10003
GMM trainned for speaker: id10003.gmm
Training model for id10004
GMM trainned for speaker: id10004.gmm
Training model for id10005
GMM trainned for speaker: id10005.gmm
Training model for id10006
GMM trainned for speaker: id10006.gmm
Training model for id10007
GMM trainned for speaker: id10007.gmm
Training model for id10008
GMM trainned for speaker: id10008.gmm
Training model for id10009
GMM trainned for speaker: id10009.gmm
Training model for id10010
GMM trainned for speaker: id10010.gmm


#### Predict on test set, store result in df

In [12]:
results,speakers = predict_speaker(all_file_paths_test,dest)

Total Speakers :  10
Total GMM models :  10


100%|██████████| 194/194 [00:12<00:00, 16.01it/s]


In [13]:
df  = pd.DataFrame(results, columns = ['wav_file','log_liklihood', 'pred'])
df['label'] = df.wav_file.apply(lambda x : speakers.index(x.split('/')[-2]))
df.head()

Unnamed: 0,wav_file,log_liklihood,pred,label
0,/content/speaker_verification/sample_data/id10...,"[-28.10063235592232, -29.23912615425182, -27.8...",2,2
1,/content/speaker_verification/sample_data/id10...,"[-28.09221164315901, -27.797266508310955, -27....",2,2
2,/content/speaker_verification/sample_data/id10...,"[-27.084397735861835, -28.234816077714385, -26...",2,2
3,/content/speaker_verification/sample_data/id10...,"[-26.185709216517715, -26.948760302056286, -26...",2,2
4,/content/speaker_verification/sample_data/id10...,"[-26.892155401604054, -27.970491474491116, -26...",2,2


#### Caculate evaluation metrices

In [14]:
accuracy, precision, recall,cm = calculate_metrics(df.label, df.pred)
print("Accuracy :", accuracy)
print("Precision :", precision)
print("Recall:", recall)
print("Confusion Matrix :\n", cm)

for i in range(len(speakers)):
  print(f"\nFAR(%), FRR(%) for class {i} : ", calculate_far_frr_multiclass(cm, i))

Accuracy : 0.8969072164948454
Precision : 0.8813367003367004
Recall: 0.8723591127901473
Confusion Matrix :
 [[ 8  0  0  1  0  1  0  0  0  1]
 [ 0 25  0  0  0  0  0  0  0  0]
 [ 0  1 10  1  0  0  0  0  0  0]
 [ 0  0  1  9  0  0  0  0  0  0]
 [ 0  0  0  0 13  0  0  0  0  0]
 [ 0  0  0  0  0 26  1  0  0  0]
 [ 1  0  1  0  0  0 35  0  0  2]
 [ 0  4  0  0  0  0  0 24  0  1]
 [ 1  0  0  0  0  0  0  0  5  2]
 [ 0  0  0  0  0  0  0  0  1 19]]

FAR(%), FRR(%) for class 0 :  (1.63, 20.0)

FAR(%), FRR(%) for class 1 :  (0.0, 16.67)

FAR(%), FRR(%) for class 2 :  (1.1, 16.67)

FAR(%), FRR(%) for class 3 :  (0.55, 18.18)

FAR(%), FRR(%) for class 4 :  (0.0, 0.0)

FAR(%), FRR(%) for class 5 :  (0.6, 3.7)

FAR(%), FRR(%) for class 6 :  (2.53, 2.78)

FAR(%), FRR(%) for class 7 :  (2.94, 0.0)

FAR(%), FRR(%) for class 8 :  (1.6, 16.67)

FAR(%), FRR(%) for class 9 :  (0.59, 24.0)
