In [134]:
import os
import sys
import numpy as np
from random import randint

class HMM(object): 
    # base class for different HMM models
    def __init__(self, T, O, pi):
        # model is (T, O, pi) where T = Transition probs(hidden_states*hidden_states), 
        # O = Emission Probs(hidden_states*states), pi = initial distribution(hidden_states)               
        if T is None:
            print "Error: You should provide the transition matrix"
            sys.exit() # Read in parameters from the file of model_name
        if O is None:
            print "Error: You should provide the emission matrix"
            sys.exit() # Read in parameters from the file of model_name
        if pi is None:
            print "Error: You should provide the initial probability"
            sys.exit() # Read in parameters from the file of model_name           
        self.pi=pi
        self.T=T
        self.O=O
        self.M=T.shape[1]  # M:number of hidden states of the model
        self.N=O.shape[1]  # N:number of states of the model

        
    def backward(self, obs):
        # This function is for backward algorithm, suppose that T, O, pi 
        #are given, and it calculates a bwk matrix (obs*states).
        # The backward algorithm can be used to calculate the likelihood 
        #of the probability P(Y_{k+1}, ... , Y_n|t_k=C)
        #=sum_q P(Y_{k+2}, ... , Y_n|t_{k+1}=q)P(q|C)P(x_{k+1}|q)
        #The backward probability b is the probability of seeing the observations from 
        #time t + 1 to the end, given that we are in state i at time t
        self.bwk = np.zeros(shape=(len(obs),self.M))
        # Initalize bwk to be empty matrix T*M
        # Initialize base cases (t == T)
        self.bwk[len(obs)-1,:]=np.ones(self.M)      
        for t in reversed(range(len(obs)-1)):
            self.bwk[t,:] = np.sum(self.bwk[t+1,:] * self.T[:,:] * self.O[:,obs[t+1]],axis=1)        
                #beta_k(C)=\sum_q beta_{k+1}(q)P(q|C)P(w_{k+1}|q)
        prob = np.sum(self.pi[0,:]* self.O[:,obs[0]] * self.bwk[0,:]) 
        return prob 
        #This prob is the likelihood of the input obs   
 

    def forward(self, obs):
        # This function is for forward algorithm, suppose that A, B, pi are given, 
        #and it calculates a fwd matrix (obs*states).
        # The forward algorithm can be used to calculate the likelihood of the model
        #P(Y1, ... , Yn)=sum_t(\prod_i P(Y[i]|t[i])P(t[i]|t[i-1])
        self.fwd = np.zeros(shape=(len(obs),self.M)) 
        #Initalize fwk to be empty matrix, and finally fwd is T*M
        # Initialize base cases (t == 0)
        self.fwd[0,:]=self.pi[0,:] * self.O[:,obs[0]] 
            #alpha_1(q)=p(w1,t1=q)=P(t1=q|t0)*p(w1|t1=q)
        # Run Forward algorithm for t > 0            
        for t in range(1, len(obs)):
            self.fwd[t,:] = np.sum(self.fwd[t-1,:] * self.T[:,:] * self.O[:,obs[t]],axis=1) 
                #alpha_k(q)=\sum_q1 alpha_{k-1}(q1)P(t_k=q|t_{k-1}=q1)P(w_k|t+k=q)
        prob = np.sum(self.fwd[len(obs) - 1,:]) 
        # The likelihood of input equals to the summation of fwd[N][t]
        return prob

    def viterbi(self, obs):
    #the task of determining which sequence of variables is the underlying source 
    #of some sequence of observations is called the decoding task
    #Decoding: Given as input an HMM = (A, B, pi) and a sequence of observations 
    #O = Y_1, ... Y_N, find the most probable sequence of states Q = X_1, ... X_T.
    # Goal: find the best path!
    # argmax_t P(Y1, ... Y_N, X_1, ..., X_T|A, B, pi)
        vit = np.zeros(shape=(len(obs),self.M))
        #[[0 for x in range(self.M)] for y in range(len(obs))] 
        # matrix
        path = {} 
        # path
        # Initialize base cases (t == 0)
        vit[0,:] = self.pi[0,:] * self.O[:,obs[0]]
        for y in range(self.M):
            path[y] = [y]           
        #for y in range(self.M):
            #vit[0][y] = self.pi[0][y] * self.O[y][obs[0]]
            #path[y] = [y]
        
        # Run Viterbi for t > 0
        
        for t in range(1, len(obs)):
            newpath = {}
            for y in range(self.M):
                (prob, state) = max((vit[t-1][y0] * self.T[y0][y] * self.O[y][obs[t]], y0) 
                                    for y0 in range(self.M))
                vit[t][y] = prob
                newpath[y] = path[state] + [y]
            # Don't need to remember the old paths
            path = newpath
        n = 0                      
        # if only one element is observed max is sought in the initialization values
        if len(obs)!=1:
            n = t
        (prob, state) = max((vit[n][y], y) for y in range(self.M))
        return (prob, path[state])
    
    def viterbi(self, obs):
    #the task of determining which sequence of variables is the underlying source 
    #of some sequence of observations is called the decoding task
    #Decoding: Given as input an HMM = (A, B, pi) and a sequence of observations 
    #O = Y_1, ... Y_N, find the most probable sequence of states Q = X_1, ... X_T.
    # Goal: find the best path!
    # argmax_t P(Y1, ... Y_N, X_1, ..., X_T|A, B, pi)
        vit = np.zeros(shape=(len(obs),self.M))
        #[[0 for x in range(self.M)] for y in range(len(obs))] 
        # matrix
        path=np.zeros(shape=(len(obs),self.M))
        path[0,:]=range(self.M)
        # path
        # Initialize base cases (t == 0)
        vit[0,:] = self.pi[0,:] * self.O[:,obs[0]]    
        # Run Viterbi for t > 0      
        for t in range(1, len(obs)):
            vit[t,:]=np.max(vit[t-1,:] * self.T[:,:] * self.O[:,obs[t]],axis=1)
            path[t,:]=np.argmax(vit[t-1,:] * self.T[:,:] * self.O[:,obs[t]],axis=1)
        prob=np.max(vit[len(obs)-1,:])
        ind=np.argmax(vit[len(obs)-1,:])
        state=path[:,ind]
        #(prob, state) = max((vit[n][y], y) for y in range(self.M))
        return (prob, state)

    
    
    def forward_backward(self, obs): 
        #Output matrix gamma: gamma[t][y]=P(q_t=j|Y_1, ..., Y_N,A,B,pi)
        #and tensor zi: zi[t][i][j]=P(q_t=i,q_{t+1}=j|Y_1, ..., Y_N,A,B,pi)
        gamma = np.zeros(shape=(len(obs),self.M))
        # this is needed to keep track of finding a state i at a time t for all i and all t
        zi= np.zeros(shape=(len(obs),self.M,self.M))
        #zi = [[[0 for x in range(self.M)] for y in range(self.M)] for z in range(len(obs))]  
        # this is needed to keep track of finding a state i at a time t and j at a time (t+1) 
        #for all i and all j and all t
        # get alpha and beta tables computes
        p_obs = self.forward(obs)
        self.backward(obs)
        # compute gamma values
        for t in range(len(obs)):
            gamma[t,:] = (self.fwd[t,:] * self.bwk[t,:]) / p_obs
            if t == 0:
                self.pi[0,:] = gamma[t,:]
                #gamma[t][y]=P(q_t=j|Y_1, ..., Y_N,A,B,pi)
                #=P(q_t=j,Y_1, ..., Y_N|A,B,pi)/P(Y_1, ..., Y_N|A,B,pi)
                #=alpha_t(j)beta_t(j)/P(Y_1, ..., Y_N|A,B,pi)
                #compute zi values up to T - 1
            if t == len(obs) - 1:
                continue
            for y1 in range(self.M):
                zi[t,:,:] = self.fwd[t,:] * self.T[:,:] * self.O[:,obs[t + 1]] * self.bwk[t + 1,:] / p_obs
        #zi[t][i][j]=P(q_t=i,q_{t+1}=j|Y_1, ..., Y_N,A,B,pi)       
        #=P(q_t=i,q_{t+1}=j,Y_1, ..., Y_N|A,B,pi)/P(Y_1, ..., Y_N|A,B,pi)
        #=alpha_t(i)a_{ij}b_j(O_{t+1})beta_{t+1}(j)/apha_t(X_T)
        return (gamma,zi)
    
    
    def baum_welch(self,obs):
        #returns model given the initial model and observations  
        #The Baum-Welch algorithm iteratively estimate the counts.
        #We will start with an estimate for the transition and observation probabilities and 
        #then use these estimated probabilities to derive better and better probabilities. 
        #We get our estimated probabilities by computing the forward probability for 
        #an observation and then dividing that probability mass among all the different 
        #paths that contributed to this forward probability.
        gamma = np.zeros(shape=(len(obs),self.M))
        zi =  np.zeros(shape=(len(obs),self.M,self.M))
        # now that we have gamma and zi let us re-estimate
        (gamma,zi)=self.forward_backward(obs)
        
        #Update T
        #T_{ij)=expected number of transitions from state i to state j/expected number 
        #of transitions from state i
        a=np.sum(zi,axis=(0,2))
        self.T=np.sum(zi,axis=0)/np.array([a,]*self.M).transpose()
        
        for y in range(self.M):
            for k in range(self.N): 
                # for all symbols vk
                val = 0.0
                for t in range(len(obs)):
                    if obs[t] == k :
                        val += gamma[t][y]
                val /= np.sum(gamma[:,y])
                self.O[y][k] = val
                #O_j(v_k)=expected number of times in state j and observing symbol vk/expected 
                #number of times in state j
        return

In [135]:
M=randint(1,10)
N=randint(1,10)   
T_raw = np.random.random((M, M)) 
row_sums_T= T_raw.sum(axis=1)
T = T_raw / row_sums_T[:, np.newaxis]
# Get transition probability
O_raw = np.random.random((M, N))
row_sums_O = O_raw.sum(axis=1)
O = O_raw / row_sums_O[:, np.newaxis]
# Get emission probability
pi_raw = np.random.random((1, M)) 
row_sums_pi = pi_raw.sum(axis=1)
pi = pi_raw / row_sums_pi[:, np.newaxis]
# Get initial probability
hmm=HMM(T,O,pi)

In [136]:
#M=randint(0,10)
#N=randint(0,10)
#hmm=HMM(M,N)
T=randint(3,10)
observations = []
for i in xrange(0,T):
    observations.append(randint(0,N-1))
#observations=[1,0,1,1]
print "M=", M, "N=", N, "Observations = ", observations

M= 5 N= 10 Observations =  [1, 2, 9, 5, 2, 9, 6, 5, 7, 7]


In [137]:
p1=hmm.backward(observations)
print " Bwk Prob = ", p1

 Bwk Prob =  1.26124358265e-10


In [138]:

p2=hmm.forward(observations)
print " Fwd Prob = ", p2

 Fwd Prob =  8.73219878763e-11


In [139]:
prob, hidden_states = hmm.viterbi(observations)
print "Max Probability = ", prob, " Hidden State Sequence = ", hidden_states

Max Probability =  2.83094538061e-14  Hidden State Sequence =  [ 3.  3.  4.  2.  3.  3.  2.  2.  4.  2.]


In [140]:
(gamma,zi)=hmm.forward_backward(observations)
print "zi= ", zi
print "gamma=", gamma

zi=  [[[ 0.0725164   0.00858647  0.0464077   0.06412959  0.0085393 ]
  [ 0.08538378  0.02822664  0.00101909  0.0533444   0.01601303]
  [ 0.03156583  0.0468396   0.03712087  0.01980767  0.01254935]
  [ 0.02656295  0.00190302  0.04867907  0.08901764  0.01119171]
  [ 0.07169858  0.05187529  0.048508    0.01609206  0.0043998 ]]

 [[ 0.03372423  0.00586407  0.00913866  0.04542717  0.07937953]
  [ 0.0397083   0.01927719  0.00020068  0.03778731  0.14885372]
  [ 0.0146799   0.03198878  0.00730989  0.01403106  0.11665613]
  [ 0.01235328  0.00129965  0.00958594  0.063057    0.10403576]
  [ 0.0333439   0.03542787  0.00955225  0.01139906  0.04089964]]

 [[ 0.03055526  0.01359399  0.08448225  0.02668307  0.018911  ]
  [ 0.03597702  0.04468805  0.00185519  0.02219556  0.03546219]
  [ 0.01330047  0.07415586  0.06757617  0.00824158  0.02779159]
  [ 0.01119248  0.00301284  0.08861714  0.0370385   0.02478498]
  [ 0.03021067  0.08212829  0.08830571  0.00669559  0.00974373]]

 [[ 0.06364758  0.01028348  0

In [141]:
hmm.baum_welch(observations)
print "The new model parameters after 1 iteration are: "
print "T = ", hmm.T
print "O = ", hmm.O
print "pi = ", hmm.pi

The new model parameters after 1 iteration are: 
T =  [[ 0.20751546  0.04502675  0.3039349   0.2470225   0.19650038]
 [ 0.25112024  0.15212734  0.00685955  0.21118298  0.37870989]
 [ 0.09567416  0.26015516  0.25749683  0.08081177  0.30586207]
 [ 0.07561809  0.00992738  0.31715239  0.34110595  0.25619619]
 [ 0.21414219  0.28391871  0.33157462  0.06469448  0.10567001]]
O =  [[ 0.          0.13958937  0.19558282  0.          0.          0.20469175
   0.08538272  0.21431611  0.          0.16043723]
 [ 0.          0.02217813  0.31005716  0.          0.          0.16129387
   0.06786475  0.22573829  0.          0.2128678 ]
 [ 0.          0.07024183  0.15333482  0.          0.          0.2168669
   0.1239185   0.23543576  0.          0.20020219]
 [ 0.          0.09714309  0.19638149  0.          0.          0.21302628
   0.06152159  0.28089757  0.          0.15102997]
 [ 0.          0.39997558  0.08868542  0.          0.          0.16323246
   0.11526723  0.11387683  0.          0.11896249]]
