In [4]:
import os
import librosa
from sklearn.mixture import GaussianMixture
import numpy as np

In [None]:
sr = 8000

# Generating GMM

In [104]:
def make_GMM(path):

    list_files = []
    mfcc_con=np.empty([0, 13])
    hop_length = int(0.005*sr)
    win_length=int(0.025*sr)
    
    for file in os.listdir(path):
        
        if file.endswith('.wav'):   
            file = os.path.join(path, file)
        
            new,srr = librosa.load(file)
            new = librosa.resample(new, orig_sr=srr, target_sr=sr)
            list_files.append(new)
        
            mfcc = librosa.feature.mfcc(y=new, sr=sr, hop_length=hop_length, n_mfcc=13,win_length=win_length).T
            mfcc_con = np.concatenate((mfcc_con,mfcc),axis=0)

    gmm = GaussianMixture(n_components=64, covariance_type='diag' , max_iter = 200 ).fit(mfcc_con)
    
    return gmm

# Training

In [105]:
gmm_assamese = make_GMM('../train/assamese')
gmm_bengali = make_GMM('../train/bengali')
gmm_gujarathi = make_GMM('../train/gujarathi')
gmm_manipuri = make_GMM('../train/manipuri')
gmm_marathi = make_GMM('../train/marathi')
gmm_odia = make_GMM('../train/odia')
gmm_telugu = make_GMM('../train/telugu')

  return f(*args, **kwargs)


# Testing

In [106]:
def test():
    
    correct = 0
    langarr = ['Assa','Beng','Guja','Mani','Mara','Odia','Telu']
    hop_length = int(0.005*sr)
    win_length=int(0.025*sr)
    
    for file in os.listdir('../test'):
        
        if file.endswith('.wav'):  
            for i in range(7):
                if(file[0:4] == langarr[i]):
                    lang = i
    
            file = os.path.join('../test', file)
        
            new,srr = librosa.load(file)
            new = librosa.resample(new, orig_sr=srr, target_sr=sr)
        
            mfcc = librosa.feature.mfcc(y=new, sr=sr, hop_length=hop_length, n_mfcc=13,win_length=win_length).T
            
            s1 =gmm_assamese.score(mfcc)
            s2 =gmm_bengali.score(mfcc)
            s3 =gmm_gujarathi.score(mfcc)
            s4 =gmm_manipuri.score(mfcc)
            s5 =gmm_marathi.score(mfcc)
            s6 =gmm_odia.score(mfcc)
            s7 =gmm_telugu.score(mfcc)
            
            scores = np.array((s1,s2,s3,s4,s5,s6))

            res = np.argmax(scores)
            
            if(res == lang):
                correct += 1
            else:
                print(res,lang)
                
    return correct

In [107]:
correct = test()
print(correct)

0 6
2 6
1 6
5 6
2 6
1 6
4 6
0 6
4 6
2 6
4 6
0 6
1 6
2 6
0 6
5 6
0 6
5 6
5 6
0 6
120
