# GMMHMM

In [1]:
# mixture model class
class GMMInfo:
    def __init__(self):
        self.weight=[] #gmm weight
        self.mean=[] #gmm mean
        self.var=[] # gmm diagonal covariance
        self.num=0 # number of gmms
# hmm class
class HMMInfo:
    def __init__(self):
        self.init=[]
        self.edge_cost=[]
        self.mix=[]
        self.num=0

In [2]:
def log_Gaussian(m,C,x):
    C=np.array(C)
    left=0.5*np.sum(np.log((2*np.pi)*C))
    right=0.5*np.sum(np.square((x-m))/C)
    return left+right

def mixture_log_Gaussian(mix,x):
    mu=mix.mean
    var=mix.var
    w=mix.weight
    cost=log_Gaussian(mu,var,x)
    # print(w,cost)
    w=np.array(w)
    total_cost=np.sum(w*cost)
    return total_cost

def Gaussian(m,C,x):
    C=np.array(C)
    part1=np.sqrt((2*np.pi)*C)
    part2=0.5*np.sum(np.square(x-m)/C)
    prob=(1/part1)*np.exp(-part2)
    return prob


def mix_Gaussian(mix,x):
    total_prob=0.0
    for i in range(mix.num):
        m=mix.mean[i][:]
        var=mix.var[i][:]
        w=mix.weight[i]
        prob=Gaussian(m,var,x)
        total_prob+=w*prob
    return total_prob

In [131]:
import numpy as np
def GMMHMM_DTW(HMM,data):
    # matrix that records the edge costs
    T=HMM.edge_cost
    zeros=np.zeros([39])
    ones=np.zeros([39])+1
    mixture_models=[]
    #create a GMM for the initial state
    init_GMM=GMMInfo()
    init_GMM.weight=[1]
    init_GMM.num=1
    init_GMM.mean.append(zeros)
    init_GMM.var.append(ones)
    mixture_models.append(init_GMM)
    for mix_model in HMM.mix:
        mixture_models.append(mix_model)
    data=np.vstack([zeros,data])
    s=len(data)
    t=len(mixture_models)
    #Matrix that stores the costs
    P=np.zeros([t,s])
    #dynamic time programming algo
    for j in range(0,s):
        for i in range(t):
            #node score
            # print((i,j))
            Cij=mixture_log_Gaussian(mixture_models[i],data[j])
            
                
            if i>=2:
                P[i][j]=min(P[i][j-1]+T[i][i],P[i-1][j-1]+T[i-1][i],
                            P[i-2][j-1]+T[i-2][i])+Cij
            elif i-1>=0:
                P[i][j]=min(P[i][j-1]+T[i][i],P[i-1][j-1]+T[i-1][i])+Cij
            else:
                P[i][j]=P[i][j]+Cij
    P=P/s
    total_cost=P[-1][-1]
    return total_cost,get_states(P)

In [4]:
# Get the list that records which state each frame belongs to
def get_states(P):
    current_state,current_frame=np.array(P.shape)-1
    states=[current_state]
    while current_state>0 and current_frame>1:
      
      current_frame-=1
      if current_state>2:
          to_check=[P[current_state][current_frame-1],P[current_state-1][current_frame-1],P[current_state-2][current_frame-1]]
          track=np.argmin(to_check)
      elif current_state>1:
          to_check=[P[current_state][current_frame-1],P[current_state-1][current_frame-1]]
          track=np.argmin(to_check)
      else:
          track=0
      if track==0:
          states.insert(0,current_state)
      elif track==1:
          current_state-=1
          states.insert(0,current_state)
      else:
          current_state-=2
          states.insert(0,current_state)
    return states
    

In [5]:
def seperate_templates(templates, num):
    states_info=[]
    for i in range(len(templates)):
        # For the ith template, the number of nodes in each state is initialized evenly
        nodes_num=len(templates[i])//num
        states_info.extend([[]])
        for state in range(1,num+1):
            for node in range(nodes_num):
                states_info[i].append(state)
        # assign the unassigned nodes to the last state
        unassigned_num=len(templates[i])-len(states_info[i])
        if unassigned_num>0:
            for m in range(unassigned_num):
                states_info[i].append(num)
    return states_info

In [6]:
def get_node_in_each_state(templates, state_num, states_info):
    node_in_each_state=[]
    for state in range(state_num+1):
        node_in_each_state.append([])
    for j in range(len(templates)):
        # print('len:',len(states_info[j]))
        for m in range(len(states_info[j])):
            k=int(states_info[j][m])
            node_in_each_state[k].append(templates[j][m])
    return node_in_each_state

In [7]:
def get_edge_cost(states_info,state_num):

    shift_prob=np.zeros((state_num+1,state_num+1))
    num_nodes_in_state=np.zeros(state_num+1)
    for i in range(len(states_info)):
        shift_prob[0][states_info[i][0]]+=1
    # count the state trainsition
    for i in range(len(states_info)):
        for j in range(len(states_info[i])-1):
            current_node=states_info[i][j]
            next_node=states_info[i][j+1]
            shift_prob[current_node][next_node]+=1
            num_nodes_in_state[current_node]+=1
        shift_prob[states_info[i][-2]][states_info[i][-1]]+=1
        num_nodes_in_state[states_info[i][-1]]+=1
    # get the probabilities of going from initial state to 1~state_num states 
    for j in range(state_num+1):
        N=len(states_info)
        N_0j=shift_prob[0][j]
        shift_prob[0][j]=N_0j/N
        if N_0j==0:
            shift_prob[0][j]=np.inf
        else:
            shift_prob[0][j]=-np.log(shift_prob[0][j])
    for i in range(1,state_num+1):
        for j in range(i,state_num+1):
            
            shift_prob[i][j]=shift_prob[i][j]/num_nodes_in_state[i]
            if shift_prob[i][j]==0:
                shift_prob[i][j]=np.inf
            else:
                shift_prob[i][j]=-np.log(shift_prob[i][j])
    return np.array(shift_prob)
        

In [124]:
from sklearn.cluster import KMeans
def Kmeans(nodes,Gaussian_num):

    if np.shape(nodes)[0]==0:
        mixture_model=GMMInfo()
        return mixture_model
    # initialize kmeans by using sklearn package   
    else:
        if np.shape(nodes)[0]>=4:
            kmeans=KMeans(n_clusters=Gaussian_num,random_state=0).fit(np.array(nodes))
        else:
            kmeans=KMeans(n_clusters=np.shape(nodes)[0],random_state=0).fit(np.array(nodes))
        mixture_model=GMMInfo()
        cov=[]
        mean=[]
        weight=np.zeros((Gaussian_num,1))
        for j in range(Gaussian_num):
            index=[]
            i=0
            w=0
            for k in kmeans.labels_:
                if j==k:
                    index.append(i)
                    w+=i
                i+=1
            weight[j]=w
            template=[nodes[m][:]for m in index]
            curr_mean=np.mean(template,axis=0)
            curr_cov=np.cov(np.array(template).T)
            diagonal_cov=np.diagonal(curr_cov,offset=0,axis1=0,axis2=1)
            cov.append(diagonal_cov)
            mean.append(curr_mean)
        #update the hmm model
        weight=weight/weight.sum()
        mixture_model.mean=mean
        mixture_model.var=cov
        mixture_model.weight=weight
        mixture_model.num=Gaussian_num

    return mixture_model
        
    

In [9]:
def initialize_HMM(templates, state_num, Gaussian_num):
    hmm=HMMInfo()
    hmm.init=np.zeros((state_num,1))
    hmm.init[0]=1
    hmm.num=state_num

    states_info=seperate_templates(templates, state_num)
    node_in_each_state=get_node_in_each_state(templates, state_num, states_info)
    hmm.edge_cost=get_edge_cost(states_info,state_num)
    
    mix_models=[]
    for i in range(state_num):
        node_in_curr_state=node_in_each_state[i+1]
        curr_state_mix_model=Kmeans(node_in_curr_state, Gaussian_num[i])
        mix_models.append(curr_state_mix_model)
    hmm.mix=mix_models
    
    return hmm,states_info

In [133]:
# train hmm model
def trainhmm(templates,state_num,Gaussian_num):
    #initialize hmm model
    hmm,states_info=initialize_HMM(templates,state_num,Gaussian_num)
    best_dist=-np.inf
    curr_dist=0
    # use at most 99 iterations to train the model
    for i in range(1,100):
        for j in range(len(templates)):
            # use dtw to get the score and update alignment of the templates
            dist,states_info[j]=GMMHMM_DTW(hmm,templates[j])
            
            curr_dist+=dist
        hmm.edge_cost=get_edge_cost(states_info,state_num)
        # according to the alignment, classify the vectors of templates into different states
        node_in_each_state=get_node_in_each_state(templates,state_num,states_info)
        GMMs=[]
        for state in range(state_num):
            curr_state_node=node_in_each_state[state+1]
            # kmeans these vectors into 4 clusters to get weight, mean, var
            curr_mixture=Kmeans(curr_state_node,Gaussian_num[state])
            GMMs.append(curr_mixture)
        hmm.mix=GMMs
        # if it converges, break the iteration
        if abs(best_dist-curr_dist)<0.0015:
            break
        # update the best score
        best_dist=curr_dist
        # print(best_dist)
        curr_dist=0
    return hmm

In [11]:
# get MFCC of length 39
def getMFCC(wavename):
    import numpy as np
    import scipy.io.wavfile as wav
    from python_speech_features import mfcc
    fs, audio = wav.read(wavename)
    feature_mfcc = mfcc(audio, samplerate=fs)
    mfcc=[]
    mfcc.append(np.hstack([feature_mfcc[0],feature_mfcc[0],feature_mfcc[0]]))
    for i in range(1,len(feature_mfcc)-1):
        delta=np.zeros(13)
        for j in range(13):
            delta[j]=feature_mfcc[i+1][j]-feature_mfcc[i-1][j]
        mfcc.append(np.hstack([feature_mfcc[i],delta]))
    mfcc.append(np.hstack([feature_mfcc[-1],feature_mfcc[-1],feature_mfcc[-1]]))

    for i in range(1,len(mfcc)-1):
        acc=np.zeros(13)
        for j in range(13):
            acc[j]=mfcc[i+1][13+j]-mfcc[i-1][13+j]
        mfcc[i]=np.hstack([mfcc[i],acc])
    mfccs=np.array(mfcc)
    std=np.std(mfccs)
    var=np.var(mfccs,1)
    for i in range(len(mfccs)):
        for j in range(39):
            mfccs[i][j]=mfccs[i][j]/var[i]
    return mfccs

In [116]:
def GMMHMM(folder):
    models=[]
    for digit in range(0,10):
        templates=[]
        # for 2,6,8 we only use two states, since if we use more states there will be some states that have no nodes and thus cannot perofrom kmeans
        if digit in [2,6,8]:
            Gaussian_num=[4,4]
        # for other digits we use three states, the reason is similar to the previous annotation
        else: 
            Gaussian_num=[4,4,4]
        
        state_num=len(Gaussian_num)
        # 5 templates
        for i in range(1,6):
            mfcc=getMFCC(folder+'/'+str(digit)+'_training_'+str(i)+'.wav')
            templates.append(mfcc)
        # call trainhmm function to get the hmm model for each digit
        hmm=trainhmm(templates,state_num,Gaussian_num)
        models.append(hmm)
    
    return models

In [134]:
folder='digit_record'
GMMHMM(folder)

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super().

[<__main__.HMMInfo at 0x18f7b48e490>,
 <__main__.HMMInfo at 0x18f77837710>,
 <__main__.HMMInfo at 0x18f7b314890>,
 <__main__.HMMInfo at 0x18f7787f9d0>,
 <__main__.HMMInfo at 0x18f7787f150>,
 <__main__.HMMInfo at 0x18f77759110>,
 <__main__.HMMInfo at 0x18f7b371f10>,
 <__main__.HMMInfo at 0x18f7775a550>,
 <__main__.HMMInfo at 0x18f7b332890>,
 <__main__.HMMInfo at 0x18f7b372210>]