In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
!pip install joblib

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import warnings
warnings.filterwarnings('ignore')
import os
import glob
import librosa
import scipy.io.wavfile as wave
from scipy.fftpack import fft, ifft, fftshift
import matplotlib.pyplot as plt 
import numpy as np
from sklearn import mixture
import joblib

In [None]:
def traininig(train_data_path,feat_train_path,trained_model_path):
  all_speakers=glob.glob(train_data_path+'*')
  directory=feat_train_path
  if not os.path.exists(directory):
      os.makedirs(directory)
      
  directory=trained_model_path
  if not os.path.exists(directory):
      os.makedirs(directory)

      
  for itr1 in range(0,len(all_speakers)):
      
      wavs=glob.glob(all_speakers[itr1]+'/*.wav')
      
      spk=(all_speakers[itr1]).split("/")[-1]
      
      if not os.path.exists(directory):
          os.makedirs(directory)
      
      final_feat=np.empty([0, 39])
      
      for itr2 in range(0,len(wavs)):
          
          y, srr = librosa.load(wavs[itr2])
          y = librosa.resample(y, srr, sr)
          # sr=8000
          
          # hop_length=int(0.005*sr)
          mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
          mfcc_delta = librosa.feature.delta(mfcc)
          mfcc_ddelta = librosa.feature.delta(mfcc_delta)
          feat=np.concatenate((mfcc,mfcc_delta,mfcc_ddelta),axis=0)
          feat=feat.transpose()
          
          final_feat=np.concatenate((final_feat,feat),axis=0)
          

          #print(final_feat.shape)
      print(spk)    
      np.savetxt(feat_train_path+spk+"_all_features.txt", final_feat, delimiter=",")

      try:
          gmm = mixture.GaussianMixture(n_components=n_mixtures, covariance_type='diag' , max_iter = max_iterations ).fit(final_feat)
      except:
          print("ERROR : Error while training model for file "+spk)
          
      try:
          joblib.dump(gmm,trained_model_path+spk+'.pkl')
      except:
          print("ERROR : Error while saving model for "+spk)
          

  print("Training Completed")


In [None]:
def testing(test_data_path,feat_test,trained_model_path):
    # train feature extraction
  all_speakers=glob.glob(test_data_path+'*')

  import os
  directory=feat_test
  if not os.path.exists(directory):
      os.makedirs(directory)

  speakers = { all_speakers[k]:k for k in range(len(all_speakers)) }

  num_test_cases={}
  tct={}
  for e in speakers:
      num_test_cases[e.replace(test_data_path,'')]=len(os.listdir(e))-1
      tct[e.replace(test_data_path,'')]=0

  print(num_test_cases)

  spk_names = { all_speakers[k].replace(test_data_path,''):k for k in range(len(all_speakers)) }

  total_speakers=len(num_test_cases)

  confusion_matrix = np.zeros((total_speakers,total_speakers))


  for itr1 in range(0,len(all_speakers)):
      
      wavs=glob.glob(all_speakers[itr1]+'/*.wav')
      
      spk=(all_speakers[itr1]).split("/")[-1]
      
      if not os.path.exists(directory):
          os.makedirs(directory)
      
      final_feat=np.empty([0, 39])
      
      for itr2 in range(0,len(wavs)):
          #print(wavs[itr2])
          
          y, srr = librosa.load(wavs[itr2])
          y = librosa.resample(y, srr, sr)
          # sr=8000

          mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13,win_length=int(0.025*sr))
          mfcc_delta = librosa.feature.delta(mfcc)
          mfcc_ddelta = librosa.feature.delta(mfcc_delta)
          feat=np.concatenate((mfcc,mfcc_delta,mfcc_ddelta),axis=0)
          feat=feat.transpose()
          
          final_feat=np.concatenate((final_feat,feat),axis=0)

          #print(final_feat.shape)
          max_score=-np.inf
          max_spk_name=""
          
          for modelfile in sorted(glob.glob(trained_model_path+'*.pkl')):
              gmm = joblib.load(modelfile) 
              score=gmm.score(feat)
              #print score
              if score>max_score:
                  max_score,max_spk_name=score,modelfile.replace(trained_model_path,'').replace('.pkl','')

          print(spk+" -> "+max_spk_name+(" Y" if spk==max_spk_name  else " N"))

          confusion_matrix[ spk_names[spk] ][spk_names[max_spk_name]]+=1
          tct[spk]+=1

          
      #print(spk)
      np.savetxt(feat_test+spk+"_all_features.txt", feat, delimiter=",")
  return tct,confusion_matrix,total_speakers
      

In [None]:
# All paths should be changed according to your file locations

feat='/content/gdrive/MyDrive/speaker_recognition/sir_spk_rec/Spk_Rec/feat/'
feat_train='/content/gdrive/MyDrive/speaker_recognition/sir_spk_rec/Spk_Rec/feat/train/'
feat_test='/content/gdrive/MyDrive/speaker_recognition/sir_spk_rec/Spk_Rec/feat/test/'
trained_model='/content/gdrive/MyDrive/speaker_recognition/sir_spk_rec/Spk_Rec/train_models/'
train_data='/content/gdrive/MyDrive/speaker_recognition/sir_spk_rec/Spk_Rec/SPK_DATA/traindata/'
test_data='/content/gdrive/MyDrive/speaker_recognition/sir_spk_rec/Spk_Rec/SPK_DATA/testdata/'

In [None]:
# for removing existing feature folders, models created
if os.path.exists('/content/gdrive/MyDrive/speaker_recognition/sir_spk_rec/Spk_Rec/feat/'):
  !rm -rf '/content/gdrive/MyDrive/speaker_recognition/sir_spk_rec/Spk_Rec/feat/'
if os.path.exists('/content/gdrive/MyDrive/speaker_recognition/sir_spk_rec/Spk_Rec/train_models/'):
  !rm -rf '/content/gdrive/MyDrive/speaker_recognition/sir_spk_rec/Spk_Rec/train_models/'

In [None]:

n_mixtures = 32
max_iterations = 200
calc_deltas=True
sr=8000
hop_length=int(0.005*sr)

In [None]:
traininig(train_data,feat_train,trained_model)

fjlg0
falk0
fgcs0
fgrw0
fdjh0
fcmg0
fjlr0
fdfb0
fcke0
feme0
Training Completed


In [None]:
tt,conf_mat,tot_spek=testing(test_data,feat_test,trained_model)

{'fgcs0': 1, 'falk0': 1, 'fjlg0': 1, 'fcmg0': 1, 'fdjh0': 1, 'fgrw0': 1, 'fcke0': 1, 'fjlr0': 1, 'fdfb0': 1, 'feme0': 1}
fgcs0 -> fgcs0 Y
fgcs0 -> fgcs0 Y
falk0 -> falk0 Y
falk0 -> falk0 Y
fjlg0 -> fjlg0 Y
fjlg0 -> fjlg0 Y
fcmg0 -> fcmg0 Y
fcmg0 -> fcmg0 Y
fdjh0 -> fdjh0 Y
fdjh0 -> fdjh0 Y
fgrw0 -> fgrw0 Y
fgrw0 -> fgrw0 Y
fcke0 -> fcke0 Y
fcke0 -> fcke0 Y
fjlr0 -> fjlr0 Y
fjlr0 -> fjlr0 Y
fdfb0 -> fdfb0 Y
fdfb0 -> fdfb0 Y
feme0 -> feme0 Y
feme0 -> feme0 Y


In [None]:
print(tt)
print("Confusion Matrix:\n",conf_mat)
print("Accuracy: ",(sum([ conf_mat[i][j] if i==j  else 0 for i in range(tot_spek) for j in range(tot_spek) ] )*100)/float(sum([i for i in tt.values()])))

{'fgcs0': 2, 'falk0': 2, 'fjlg0': 2, 'fcmg0': 2, 'fdjh0': 2, 'fgrw0': 2, 'fcke0': 2, 'fjlr0': 2, 'fdfb0': 2, 'feme0': 2}
Confusion Matrix:
 [[2. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 2. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 2. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 2. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 2. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 2. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 2. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 2. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 2. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 2.]]
Accuracy:  100.0
