In [1]:
import numpy as np
from sklearn import preprocessing
import python_speech_features as mfcc

def calculate_delta(array):
    """Calculate and returns the delta of given feature vector matrix"""

    rows,cols = array.shape
    deltas = np.zeros((rows,20))
    N = 2
    for i in range(rows):
        index = []
        j = 1
        while j <= N:
            if i-j < 0:
              first =0
            else:
              first = i-j
            if i+j > rows-1:
                second = rows-1
            else:
                second = i+j 
            index.append((second,first))
            j+=1
        deltas[i] = ( array[index[0][0]]-array[index[0][1]] + (2 * (array[index[1][0]]-array[index[1][1]])) ) / 10
    return deltas

def extract_features(audio,rate):
    """extract 20 dim mfcc features from an audio, performs CMS and combines 
    delta to make it 40 dim feature vector"""    
    
    mfcc_feature = mfcc.mfcc(audio,rate, 0.025, 0.01,20,nfft = 1200, appendEnergy = True)    
    mfcc_feature = preprocessing.scale(mfcc_feature)
    delta = calculate_delta(mfcc_feature)
    combined = np.hstack((mfcc_feature,delta)) 
    return combined

In [4]:
import librosa
import _pickle as cPickle
import numpy as np
from scipy.io.wavfile import read
from sklearn import mixture
model = mixture.GaussianMixture(n_components=3, covariance_type='full',n_init = 3)
import sklearn.feature_extraction
#from speakerfeatures import extract_features
import warnings
warnings.filterwarnings("ignore")

#path to training data
# source   = "development_set/"
source   = "TestData/"   

#path where training speakers will be saved

# dest = "speaker_models/"
# train_file = "development_set_enroll.txt"

dest = "Speakers_models/"
train_file = "trainingDataPath.txt"        
file_paths = open(train_file,'r')

count = 1
# Extracting features for each speaker (5 files per speakers)
features = np.asarray(())
for path in file_paths:    
    path = path.strip()   
    print(path)
    
    # read the audio
    sr,audio = read(source + path)
    
    # extract 40 dimensional MFCC & delta MFCC features
    vector   = extract_features(audio,sr)
    
    if features.size == 0:
        features = vector
    else:
        features = np.vstack((features, vector))
    # when features of 5 files of speaker are concatenated, then do model training
	# -> if count == 5: --> edited below
    if count == 5:    
        gmm = model
        gmm.fit(features)
        
        # dumping the trained gaussian model
        picklefile = path.split("-")[0]+".gmm"
        cPickle.dump(gmm,open(dest + picklefile,'wb'))
        print ('+ modeling completed for speaker:',picklefile," with data point = ",features.shape)    
        features = np.asarray(())
        count = 0
    count = count + 1

Aashna Hegde/AH1(1).wav
Aashna Hegde/AH1(2).wav
Aashna Hegde/AH2(1).wav
Aashna Hegde/AH2(2).wav
Aashna Hegde/AH3(1).wav
+ modeling completed for speaker: Aashna Hegde/AH3(1).wav.gmm  with data point =  (61321, 40)
Ashish/Ashish1(1).wav
Ashish/Ashish1(2).wav
Ashish/Ashish2(1).wav
Ashish/Ashish2(2).wav
Ashish/Ashish3(1).wav
+ modeling completed for speaker: Ashish/Ashish3(1).wav.gmm  with data point =  (60010, 40)
BB Ki Vines/BB1(1).wav
BB Ki Vines/BB1(2).wav
BB Ki Vines/BB2(1).wav
BB Ki Vines/BB2(2).wav
BB Ki Vines/BB3(1).wav
+ modeling completed for speaker: BB Ki Vines/BB3(1).wav.gmm  with data point =  (60010, 40)
Beer Biceps/Beer1(1).wav
Beer Biceps/Beer1(2).wav
Beer Biceps/Beer2(1).wav
Beer Biceps/Beer2(2).wav
Beer Biceps/Beer3(1).wav
+ modeling completed for speaker: Beer Biceps/Beer3(1).wav.gmm  with data point =  (61979, 40)
Carryminati/Carryminati1(1).wav
Carryminati/Carryminati1(2).wav
Carryminati/Carryminati2(1).wav
Carryminati/Carryminati2(2).wav
Carryminati/Carryminati3(1).

Aamir_Khan/AamirKhan2.wav
Aamir_Khan/AamirKhan3.wav
Aamir_Khan/AamirKhan4.wav
Aamir_Khan/AamirKhan5.wav
+ modeling completed for speaker: Aamir_Khan/AamirKhan5.wav.gmm  with data point =  (60034, 40)
Katrina_Kaif/KatrinaKaif1.wav
Katrina_Kaif/KatrinaKaif2.wav
Katrina_Kaif/KatrinaKaif3.wav
Katrina_Kaif/KatrinaKaif4.wav
Katrina_Kaif/KatrinaKaif5.wav
+ modeling completed for speaker: Katrina_Kaif/KatrinaKaif5.wav.gmm  with data point =  (60034, 40)
Priyanka_Chopra/PriyankaChopra1.wav
Priyanka_Chopra/PriyankaChopra2.wav
Priyanka_Chopra/PriyankaChopra3.wav
Priyanka_Chopra/PriyankaChopra4.wav
Priyanka_Chopra/PriyankaChopra5.wav
+ modeling completed for speaker: Priyanka_Chopra/PriyankaChopra5.wav.gmm  with data point =  (60034, 40)
Ranveer_Singh/RanveerSingh1.wav
Ranveer_Singh/RanveerSingh2.wav
Ranveer_Singh/RanveerSingh3.wav
Ranveer_Singh/RanveerSingh4.wav
Ranveer_Singh/RanveerSingh5.wav
+ modeling completed for speaker: Ranveer_Singh/RanveerSingh5.wav.gmm  with data point =  (60034, 40)
Ra

PermissionError: [Errno 13] Permission denied: 'TestData/'

In [5]:
import os
import _pickle as cPickle
import numpy as np
from scipy.io.wavfile import read
import sklearn.feature_extraction 
#from speakerfeatures import extract_features
import warnings
warnings.filterwarnings("ignore")
import time

"""
#path to training data
source   = "development_set/"   
modelpath = "speaker_models/"
test_file = "development_set_test.txt"        
file_paths = open(test_file,'r')
"""
#path to training data
source   = "TestData/" 

#path where training speakers will be saved
modelpath = "Speakers_models/"
gmm_files=[]
for fname in os.listdir(modelpath):
    file=os.path.join(modelpath,fname)
    file+="/"
    for j in os.listdir(file):
        if(j.endswith('.gmm')):
            gmm_files.append(os.path.join(file,j))
#gmm_files = [os.path.join(modelpath,fname) for fname in 
              #os.listdir(modelpath) if fname.endswith('.gmm')]
#Load the Gaussian gender Models
models    = [cPickle.load(open(fname,'rb')) for fname in gmm_files]
speakers   = [fname.split("/")[-1].split(".gmm")[0] for fname 
              in gmm_files]

error = 0
total_sample = 0.0


print ("Do you want to Test a Single Audio: Press '1' or The complete Test Audio Sample: Press '0' ?")
take = int(input().strip())
if take == 1:
    print ("Enter the File name from Test Audio Sample Collection :")
    path = input().strip()   
    print ("Testing Audio : ", path)
    sr,audio = read(source + path)
    vector   = extract_features(audio,sr)
    log_likelihood = np.zeros(len(models))

    for i in range(len(models)):
        gmm    = models[i]  #checking with each model one by one
        scores = np.array(gmm.score(vector))
        log_likelihood[i] = scores.sum()
    winner = np.argmax(log_likelihood)
    print ("\tdetected as - ", speakers[winner])

    time.sleep(1.0)
elif take == 0:
    test_file = "testSamplePath.txt"        
    file_paths = open(test_file,'r')


    # Read the test directory and get the list of test audio files 
    for path in file_paths:   
        total_sample += 1.0
        path = path.strip()   
        print("Testing Audio : ",path)
        sr,audio = read(source + path)
        vector   = extract_features(audio,sr)

        log_likelihood = np.zeros(len(models)) 

        for i in range(len(models)):
            gmm    = models[i]  #checking with each model one by one
            scores = np.array(gmm.score(vector))
            log_likelihood[i] = scores.sum()
        winner = np.argmax(log_likelihood)
        print ("\tdetected as - ", speakers[winner][0:-4])

        checker_name = path.split("_")[0]
        


print ("Hurray ! Speaker identified. Mission Accomplished Successfully. ")

Do you want to Test a Single Audio: Press '1' or The complete Test Audio Sample: Press '0' ?
0
Testing Audio :  Yuzi/Yuzi1.wav
	detected as -  Yuzi5
Testing Audio :  Aakash_chopra/Aakash_Chopra1.wav
	detected as -  Aakash_Chopra5
Testing Audio :  ABD/ABD5.wav
	detected as -  AB_deviliers
Testing Audio :  Ashwin/Ashwin4.wav
	detected as -  Ravi_Ashwin
Testing Audio :  Ben_stokes/Ben_stokes2.wav
	detected as -  Ben_stokes5
Testing Audio :  Bhuvi/Bhuvi4.wav
	detected as -  Bhuvneshwar_kumar
Testing Audio :  Bindra/Abhinav_Bindra3.wav
	detected as -  Abhinav_Bindra
Testing Audio :  Ganguly/Saurav_Ganguly5.wav
	detected as -  Saurav_Ganguly5
Testing Audio :  Kohli/Virat_kohli4.wav
	detected as -  Virat_kohli
Testing Audio :  Raina/Suresh_Raina5.wav
	detected as -  Suresh_Raina5
Testing Audio :  Sachin/Sachin4.wav
	detected as -  Sachin5
Testing Audio :  Tim_Paine/Tim_Paine5.wav
	detected as -  Tim_Paine
Testing Audio :  Tim_Paine/Tim_Paine2.wav
	detected as -  Tim_Paine
Testing Audio :  Bhu