In [None]:
!pip install python_speech_features



In [None]:
import numpy as np
from sklearn import preprocessing
import python_speech_features as mfcc

def calculate_delta(array):
    """Calculate and returns the delta of given feature vector matrix"""

    rows,cols = array.shape
    deltas = np.zeros((rows,20))
    N = 2
    for i in range(rows):
        index = []
        j = 1
        while j <= N:
            if i-j < 0:
              first =0
            else:
              first = i-j
            if i+j > rows-1:
                second = rows-1
            else:
                second = i+j 
            index.append((second,first))
            j+=1
        deltas[i] = ( array[index[0][0]]-array[index[0][1]] + (2 * (array[index[1][0]]-array[index[1][1]])) ) / 10
    return deltas

def extract_features(audio,rate):
    """extract 20 dim mfcc features from an audio, performs CMS and combines 
    delta to make it 40 dim feature vector"""    
    
    mfcc_feature = mfcc.mfcc(audio,rate, 0.025, 0.01,20,nfft = 1200, appendEnergy = True)    
    mfcc_feature = preprocessing.scale(mfcc_feature)
    delta = calculate_delta(mfcc_feature)
    combined = np.hstack((mfcc_feature,delta)) 
    return combined

In [None]:
import os
import pickle
import numpy as np
from scipy.io.wavfile import read
from sklearn import mixture
import warnings
warnings.filterwarnings("ignore")
pathname="record/user2/"
arr = os.listdir(pathname)
print(arr)

# Extracting features for each speaker (5 files per speakers)
features = np.asarray(())
for path in arr:    
    path = path.strip()   
    print (path)
    
    # read the audio
    sr,audio = read(pathname + path)
    
    # extract 40 dimensional MFCC & delta MFCC features
    vector   = extract_features(audio,sr)
    
    if features.size == 0:
        features = vector
    else:
        features = np.vstack((features, vector))
    # when features of 5 files of speaker are concatenated, then do model training
	# -> if count == 5: --> edited below
 
    gmm = mixture.GaussianMixture(n_components=16, random_state=0)
    gmm.fit(features)
    
    # dumping the trained gaussian model
    picklefile ="user2.gmm"
    pickle.dump(gmm,open(picklefile,'wb'))
    print ('+ modeling completed for speaker:',picklefile," with data point = ",features.shape )

['user26.wav', 'user23.wav', 'user22.wav', 'user214.wav', 'user210.wav', 'user21.wav', 'user29.wav', 'user25.wav', 'user212.wav', 'user213.wav', 'user211.wav', 'user28.wav', 'user215.wav', 'user24.wav', 'user27.wav']
user26.wav
+ modeling completed for speaker: user2.gmm  with data point =  (599, 40)
user23.wav
+ modeling completed for speaker: user2.gmm  with data point =  (1198, 40)
user22.wav
+ modeling completed for speaker: user2.gmm  with data point =  (1797, 40)
user214.wav
+ modeling completed for speaker: user2.gmm  with data point =  (2396, 40)
user210.wav
+ modeling completed for speaker: user2.gmm  with data point =  (2995, 40)
user21.wav
+ modeling completed for speaker: user2.gmm  with data point =  (3594, 40)
user29.wav
+ modeling completed for speaker: user2.gmm  with data point =  (4193, 40)
user25.wav
+ modeling completed for speaker: user2.gmm  with data point =  (4792, 40)
user212.wav
+ modeling completed for speaker: user2.gmm  with data point =  (5391, 40)
user213

In [None]:
import os
import pickle
import numpy as np
from scipy.io.wavfile import read
import warnings
warnings.filterwarnings("ignore")
import time

gmm_files = ["user2.gmm"]

#Load the Gaussian gender Models
models    = [pickle.load(open(fname,'rb')) for fname in gmm_files]
speakers   = [fname.split("/")[-1].split(".gmm")[0] for fname 
              in gmm_files]
print(models)
print(speakers)

error = 0
total_sample = 0.0
sr,audio = read("record/user2/user25.wav")
vector   = extract_features(audio,sr)

log_likelihood = np.zeros(len(models)) 

for i in range(len(models)):
	gmm    = models[i]  #checking with each model one by one
	scores = np.array(gmm.score(vector))
	log_likelihood[i] = scores.sum()

winner = np.argmax(log_likelihood)
print ("detected as - ", speakers[winner])

[GaussianMixture(n_components=16, random_state=0)]
['user2']
detected as -  user2
