Gaussian Mixture Model Hidden Markov Models for Speech Recognition 

Problem 1: Sampling from a GMMHMM

In [9]:
import numpy as np
import gmmhmm as GH
import MFCC
from scipy.io.wavfile import read as wread
import glob
import pickle

def sample_gmmhmm(gmmhmm, n_sim):
 """
 Simulate sampling from a GMMHMM.
 [A, weights, means, covars, pi]
 Returns
 -------
 states : ndarray of shape (n_sim,)
 The sequence of states
 obs : ndarray of shape (n_sim, K)
 The generated observations (column vectors of length K)
 """

 state_dist = gmmhmm[-1]
 A = gmmhmm[0]
 weights = gmmhmm[1]
 means, covars = gmmhmm[2], gmmhmm[3]
 K = means[0,0].size
 
 states = np.zeros(n_sim, dtype = int)
 obs = np.zeros((n_sim, K))
 for i in xrange(n_sim):
  s = np.random.multinomial(1,state_dist)[0]
  states[i] = s
  component = np.random.multinomial(1, weights[s, :])[0]
  mu, sig = means[s, component,:], covars[s, component, :, :]
  obs[i,:] =  np.random.multivariate_normal(mu, sig)
#  print "state ", states[i], obs[i]
  state_dist = A.dot(state_dist)
 return states, obs

We load the wav files and extract cepler coefficients with the below function

In [10]:
def load():
 names = ["Mathematics", "Biology", "PoliticalScience", "Statistics", "Psychology"]
 sampledict = {}
 for name in names:
  sampledict[name] = []
  for fname in glob.glob("Samples/"+name+" *"):
   w = wread(fname)
   sampledict[name].append(MFCC.extract(w[1])[:30])
 return names, sampledict 

Now that we've loaded the data, we can train the model with the handy function below.

In [11]:
def initialize(n):
 T = np.random.random((n,n))
 pi = np.random.random(n)
 pi /= np.sum(pi)
 T /= np.sum(T, axis = 1).reshape((n,1))
 return pi, T

def load_and_train():
 names, sampledict = load()
 best_models = {}
 for name in names:
  bestprob = -np.inf
  bestmodel = None
  #Random Restarts
  for i in xrange(10):
   startprob, transmat = initialize(5)
   model = GH.GMMHMM(n_components=5, n_mix=3, transmat=transmat, startprob=startprob,  cvtype='diag')
   # these values for covars_prior and var should work well for this problem
   model.covars_prior = 0.01
   model.fit(sampledict[name][:20], init_params='mc', var=0.1)
   print "Trained Again on "+name+", new prob: ", model.logprob
   if model.logprob > bestprob:
     bestprob = model.logprob
     bestmodel = model
     print "New Best Prob for "+name+" "+str(bestprob)
  best_models[name] = bestmodel
 #save
 f = open("gmmmodels","wb")
 pickle.dump(best_models, f)
 f.close()
 return best_models, names, sampledict

After the above code trains in only 2 minutes, as we were smart and used only 30 cepler coefficients, we validate the accuracy of our model on the test data.

In [12]:
def test():
 models, names, sampledict  = load_and_train()
 accuracies = {}
 for name in names:
   tot = len(sampledict[name]) - 20
   cor = 0.
   for sample in sampledict[name][20:]:
     mscore = -np.inf
     bname = None
     for cand_name in names:
      score =  models[cand_name].score(sample)
      if score > mscore:
       mscore = score
       bname = cand_name
     if bname == name:
      cor += 1.
   accuracies[name] = 100*cor/tot
 return accuracies
adict = test()
print "Accuracy Breakdown: ", adict
print "Overall Accuracy: ", sum([adict[name] for name in adict])/len(adict.keys())

Trained Again on Mathematics, new prob:  -4040.77268791
New Best Prob for Mathematics -4040.77268791
Trained Again on Mathematics, new prob:  -3953.42508
New Best Prob for Mathematics -3953.42508
Trained Again on Mathematics, new prob:  -3971.93788922
Trained Again on Mathematics, new prob:  -3899.3927969
New Best Prob for Mathematics -3899.3927969
Trained Again on Mathematics, new prob:  -4134.07959978
Trained Again on Mathematics, new prob:  -4333.75254349
Trained Again on Mathematics, new prob:  -3882.54045267
New Best Prob for Mathematics -3882.54045267
Trained Again on Mathematics, new prob:  -4025.89108343
Trained Again on Mathematics, new prob:  -4317.11607219
Trained Again on Mathematics, new prob:  -4319.64135
Trained Again on Biology, new prob:  -5004.84692867
New Best Prob for Biology -5004.84692867
Trained Again on Biology, new prob:  -4524.3939161
New Best Prob for Biology -4524.3939161
Trained Again on Biology, new prob:  -4462.23374134
New Best Prob for Biology -4462.233

It should be noted that accuracy is highly dependant on the initial conditions. On a different run, I obtained the following output.

In [14]:
print """
Trained Again on Mathematics, new prob:  -3912.2283613
New Best Prob for Mathematics -3912.2283613
Trained Again on Mathematics, new prob:  -4088.19268306
Trained Again on Mathematics, new prob:  -3984.41664154
Trained Again on Mathematics, new prob:  -4024.96627592
Trained Again on Mathematics, new prob:  -4030.07805858
Trained Again on Mathematics, new prob:  -4085.71020813
Trained Again on Mathematics, new prob:  -4169.75890684
Trained Again on Mathematics, new prob:  -4057.68004774
Trained Again on Mathematics, new prob:  -3956.42304341
Trained Again on Mathematics, new prob:  -4049.61447941
Trained Again on Biology, new prob:  -4478.17131295
New Best Prob for Biology -4478.17131295
Trained Again on Biology, new prob:  -4297.7317647
New Best Prob for Biology -4297.7317647
Trained Again on Biology, new prob:  -4540.24990046
Trained Again on Biology, new prob:  -4155.51881087
New Best Prob for Biology -4155.51881087
Trained Again on Biology, new prob:  -4277.78063657
Trained Again on Biology, new prob:  -4923.82994236
Trained Again on Biology, new prob:  -4417.30835921
Trained Again on Biology, new prob:  -4827.63211316
Trained Again on Biology, new prob:  -4500.40941922
Trained Again on Biology, new prob:  -4384.06583096
Trained Again on PoliticalScience, new prob:  -2313.91853051
New Best Prob for PoliticalScience -2313.91853051
Trained Again on PoliticalScience, new prob:  -2241.76953245
New Best Prob for PoliticalScience -2241.76953245
Trained Again on PoliticalScience, new prob:  -2164.98729171
New Best Prob for PoliticalScience -2164.98729171
Trained Again on PoliticalScience, new prob:  -2218.73028128
Trained Again on PoliticalScience, new prob:  -2131.45507306
New Best Prob for PoliticalScience -2131.45507306
Trained Again on PoliticalScience, new prob:  -1803.70656584
New Best Prob for PoliticalScience -1803.70656584
Trained Again on PoliticalScience, new prob:  -2021.76269025
Trained Again on PoliticalScience, new prob:  -2212.87686714
Trained Again on PoliticalScience, new prob:  -1787.27665936
New Best Prob for PoliticalScience -1787.27665936
Trained Again on PoliticalScience, new prob:  -2413.09640232
Trained Again on Statistics, new prob:  -3130.06366036
New Best Prob for Statistics -3130.06366036
Trained Again on Statistics, new prob:  -2695.19321107
New Best Prob for Statistics -2695.19321107
Trained Again on Statistics, new prob:  -2710.00080068
Trained Again on Statistics, new prob:  -2343.90802172
New Best Prob for Statistics -2343.90802172
Trained Again on Statistics, new prob:  -2571.58609146
Trained Again on Statistics, new prob:  -3041.35933206
Trained Again on Statistics, new prob:  -3104.50978636
Trained Again on Statistics, new prob:  -2590.50602515
Trained Again on Statistics, new prob:  -2879.63760985
Trained Again on Statistics, new prob:  -2977.88081599
Trained Again on Psychology, new prob:  -3861.90855544
New Best Prob for Psychology -3861.90855544
Trained Again on Psychology, new prob:  -3546.42568755
New Best Prob for Psychology -3546.42568755
Trained Again on Psychology, new prob:  -3994.22072117
Trained Again on Psychology, new prob:  -3545.0881218
New Best Prob for Psychology -3545.0881218
Trained Again on Psychology, new prob:  -3602.94127203
Trained Again on Psychology, new prob:  -3716.33163344
Trained Again on Psychology, new prob:  -3560.04022452
Trained Again on Psychology, new prob:  -3577.94841488
Trained Again on Psychology, new prob:  -3669.19155851
Trained Again on Psychology, new prob:  -3657.24364314
Accuracy Breakdown:  {'Mathematics': 100.0, 'Biology': 90.0, 'Statistics': 100.0, 'Psychology': 100.0, 'PoliticalScience': 100.0}
Overall Accuracy:  98.0"""




Trained Again on Mathematics, new prob:  -3912.2283613
New Best Prob for Mathematics -3912.2283613
Trained Again on Mathematics, new prob:  -4088.19268306
Trained Again on Mathematics, new prob:  -3984.41664154
Trained Again on Mathematics, new prob:  -4024.96627592
Trained Again on Mathematics, new prob:  -4030.07805858
Trained Again on Mathematics, new prob:  -4085.71020813
Trained Again on Mathematics, new prob:  -4169.75890684
Trained Again on Mathematics, new prob:  -4057.68004774
Trained Again on Mathematics, new prob:  -3956.42304341
Trained Again on Mathematics, new prob:  -4049.61447941
Trained Again on Biology, new prob:  -4478.17131295
New Best Prob for Biology -4478.17131295
Trained Again on Biology, new prob:  -4297.7317647
New Best Prob for Biology -4297.7317647
Trained Again on Biology, new prob:  -4540.24990046
Trained Again on Biology, new prob:  -4155.51881087
New Best Prob for Biology -4155.51881087
Trained Again on Biology, new prob:  -4277.78063657
Trained Again o

Code ran for 2 minutes, got 98% accuracy. YEEEHAW!!!