## Speech and Speaker Recognition - DT2119 VT19-1 

### HMM - Lab 2

In [167]:
from __future__ import print_function

import numpy as np
import math

from lab2_tools import *
import prondict as prondict

from matplotlib import pyplot as plt
import seaborn as sns

%matplotlib inline

In [171]:
prondict = {} 
prondict['o'] = ['ow']
prondict['z'] = ['z', 'iy', 'r', 'ow']
prondict['1'] = ['w', 'ah', 'n']
prondict['2'] = ['t', 'uw']
prondict['3'] = ['th', 'r', 'iy']
prondict['4'] = ['f', 'ao', 'r']
prondict['5'] = ['f', 'ay', 'v']
prondict['6'] = ['s', 'ih', 'k', 's']
prondict['7'] = ['s', 'eh', 'v', 'ah', 'n']
prondict['8'] = ['ey', 't']
prondict['9'] = ['n', 'ay', 'n']

In [27]:
example = np.load('lab2_example.npz')['example'].item()
for x in example.keys(): print(x, end = '\t')

digit	obsloglik	logalpha	logbeta	gender	filename	samplingrate	vloglik	speaker	samples	loglik	lmfcc	repetition	loggamma	vpath	

In [28]:
data = np.load('lab2_data.npz')['data']
for x in data[0]: print(x, end = '\t')

digit	gender	filename	samplingrate	speaker	samples	lmfcc	repetition	

In [29]:
phoneHMMs_one = np.load('lab2_models_onespkr.npz')['phoneHMMs'].item()
for x in phoneHMMs_one.keys(): print(x, end = '\t')

iy	eh	f	ah	k	sp	th	ow	ao	n	ih	s	r	ey	t	w	v	ay	z	uw	sil	

In [30]:
phoneHMMs_all = np.load('lab2_models_all.npz')['phoneHMMs'].item()
for x in phoneHMMs_all.keys(): print(x, end = '\t')

iy	eh	f	ah	k	sp	th	ow	ao	n	ih	s	r	ey	t	w	v	ay	z	uw	sil	

In [52]:
print(phoneHMMs_one['ah'].keys())

[u'transmat', u'covars', u'startprob', u'name', u'means']


In [53]:
def concatHMMs(hmmmodels, namelist):
    """ Concatenates HMM models in a left to right manner

    Args:
       hmmmodels: dictionary of models indexed by model name. 
       hmmmodels[name] is a dictionaries with the following keys:
           name: phonetic or word symbol corresponding to the model
           startprob: M+1 array with priori probability of state
           transmat: (M+1)x(M+1) transition matrix
           means: MxD array of mean vectors
           covars: MxD array of variances
       namelist: list of model names that we want to concatenate

    D is the dimension of the feature vectors
    M is the number of emitting states in each HMM model (could be
      different in each model)

    Output
       combinedhmm: dictionary with the same keys as the input but
                    combined models:
         startprob: K+1 array with priori probability of state
          transmat: (K+1)x(K+1) transition matrix
             means: KxD array of mean vectors
            covars: KxD array of variances

    K is the sum of the number of emitting states from the input models

    Example:
       wordHMMs['o'] = concatHMMs(phoneHMMs, ['sil', 'ow', 'sil'])
    """
    concat = hmmmodels[namelist[0]]
    for idx in range(1,len(namelist)):
        concat = concatTwoHMMs(concat, hmmmodels[namelist[idx]])
    return concat

In [268]:
def concatTwoHMMs(hmm1, hmm2):
    """ Concatenates 2 HMM models

    Args:
       hmm1, hmm2: two dictionaries with the following keys:
           name: phonetic or word symbol corresponding to the model
           startprob: M+1 array with priori probability of state
           transmat: (M+1)x(M+1) transition matrix
           means: MxD array of mean vectors
           covars: MxD array of variances

    D is the dimension of the feature vectors
    M is the number of emitting states in each HMM model (could be different for each)

    Output
       dictionary with the same keys as the input but concatenated models:
          startprob: K+1 array with priori probability of state
          transmat: (K+1)x(K+1) transition matrix
             means: KxD array of mean vectors
            covars: KxD array of variances

    K is the sum of the number of emitting states from the input models
   
    Example:
       twoHMMs = concatHMMs(phoneHMMs['sil'], phoneHMMs['ow'])

    See also: the concatenating_hmms.pdf document in the lab package
    """
    concatedHMM = {}
    #M is the number of emitting states in each HMM model (could be different for each)
    #K is the sum of the number of emitting states from the input models
    
    M1 = hmm1['means'].shape[0]
    M2 = hmm2['means'].shape[0]
    K = M1 + M2
    
    concatedHMM['name'] = hmm1['name'] + hmm2['name']
    concatedHMM['startprob'] = np.zeros((K + 1, 1))
    concatedHMM['transmat'] = np.zeros((K + 1, K + 1))
    concatedHMM['means'] = np.vstack((hmm1['means'],hmm2['means']))
    concatedHMM['covars'] = np.vstack((hmm1['covars'],hmm2['covars']))
        
    
    start1 = hmm1['startprob'].reshape(-1,1)
    start2 = hmm2['startprob'].reshape(-1,1)
    
    concatedHMM['startprob'][:hmm1['startprob'].shape[0]-1,:] = start1[:-1,:]
    concatedHMM['startprob'][hmm1['startprob'].shape[0]-1:,:] = np.dot(start1[-1,0],start2)
    trans = concatedHMM['transmat']
    trans1 = hmm1['transmat']
    trans2 = hmm2['transmat']
#     for col in range(K+1):
#         if(col < hmm1['transmat'].shape[1]-1):
#             concatedHMM['transmat'][:hmm1['transmat'].shape[0], col] = hmm1['transmat'][:, col] 
#         else:
#             concatedHMM[:hmm1['transmat'].shape[0]-1, col:] = hmm1['transmat'][:-1][-1]
    
    trans[:trans1.shape[0]-1,:trans1.shape[1]-1] = trans1[:-1,:-1]
    temp = trans1[:-1,-1].reshape(-1,1)
    trans[:trans1.shape[0]-1,trans1.shape[1]-1:] = \
                            np.dot(temp,start2.T)
    trans[trans1.shape[0]-1:,trans1.shape[1]-1:] = trans2
    
    concatedHMM['transmat'] = trans

    
    return concatedHMM

In [272]:
isolated = {}

for digit in prondict.keys():
    isolated[digit] = ['sil'] + prondict[digit] + ['sil']
    
wordHMMs = {}
wordHMMs['8'] = concatHMMs(phoneHMMs, isolated['8'])

In [271]:
a = np.ones((3,1))

In [253]:
a

array([[1.],
       [1.],
       [1.]])

In [254]:
np.dot(a,3)

array([[3.],
       [3.],
       [3.]])