### Problem 22

Given: A string x, followed by the alphabet Σ from which x was constructed, followed by the states States, transition matrix Transition, and emission matrix Emission of an HMM (Σ, States, Transition, Emission).

Return: The probability Pr(x) that the HMM emits x.

formula for the forward algorithm:
$forward_{k,i}=\sum \limits _{all\;states\;l}forward_{l,i-1}\cdot weight_{i}(l,k)$, where l is the state in the last column 
and k is the current state

In [51]:
import numpy as np
class ProbString():
    def __init__(self, infile):
        '''
        contructor: saves attributes 
        
        Parameters
        ----------
            infile : file name
                
        '''
        self.file=infile
    
    def readHMM(self):
        """
        read HMM file
        
        Return
        ----------
        String:list
            the index of observable string the path emits
        state:list
            all the states
        emission:
            all the emission 
        transition.astype(float):ndarray
            transition probability
        emissionProb.astype(float):ndarray
            emission matrix
        """
        with open(self.file) as rawData:
            data=rawData.readlines()
        string=data[0].rstrip() #the first row is the obserable string
        emission=data[2].rstrip().split('\t') #emission list
        state=data[4].rstrip().split('\t') #state list
        #-----------------------from 7th to 8+len(state)-1 is transition matrix----------------------#
        transitionMatrix=data[8:8+len(state)-1] 
        transitionProb=np.array([data[7].rstrip().split('\t')[1:]]) #the first row
        for i in range(len(state)-1):
            transitionProb=np.append(transitionProb,[transitionMatrix[i].rstrip().split('\t')[1:]],axis=0)
        #-----------------------from 7th to 8+len(state)-1 is transition matrix----------------------#
        
        #---------------------from 5th line on, the rest is emission probability----------------------#
        probMatrix=data[10+len(state):] 
        emissionProb=np.array([data[9+len(state)].rstrip().split('\t')[1:]]) #emission probability starts from here
        for i in range(len(state)-1):
            #we append the probability to the matrix
            emissionProb=np.append(emissionProb,[probMatrix[i].rstrip().split('\t')[1:]],axis=0) 
        #---------------------from 5th line on, the rest is emission probability----------------------#
        String=[] #stores the index of the observe
        for i in string:
            String.append(emission.index(i)) #we can now use the index to access emission matrix
        return String,state,emission,transitionProb.astype(float),emissionProb.astype(float)
    
    def forward(self,string,state,emission,transitionProb,emissionProb):
        """
        Calculate The probability Pr(x) that the HMM emits x.
        
        Parameters
        ----------
        string:list
            the index of observable string the path emits
        state:list
            all the states
        emission:
            all the emission 
        transitionProb:ndarray
            transition probability
        emissionProb:ndarray
            emission matrix
            
        Returns
        ----------
        stringProb:float
            the probability Pr(x) that the HMM emits x.
        """
        forwardMatrix=np.empty([len(state), len(string)]) #create an empty matrix to store the node
        forwardMatrix[:,0]=np.multiply(1/len(state),np.array(emissionProb[:,string[0]])) #calcalte the fisrt column
        #-----------------------------------fill the forwardMatrix---------------------------------------#
        for col in range(1,len(string)):
            for row in range(len(state)):
                #see the formula in markdown cell
                forwardMatrix[row,col]=sum(np.multiply(forwardMatrix[:,col-1],transitionProb[:,row]))*emissionProb[row,string[col]]
        #-----------------------------------fill the forwardMatrix---------------------------------------#
        stringProb=np.sum(forwardMatrix[:,-1]) #the overall prob is the sum of the last column      
        return stringProb
        

### Main

In [53]:
def main(infile):
    '''
    Get the probability of the path here
    
    Parameters
        ----------
        infile : str 
            the filename  

        Returns
        -------
        STDOUT
    '''
    hmm=ProbString(infile) #instantiation
    string,state,emission,transitionProb,emissionProb=hmm.readHMM() #extrct infromation from the infile
    print(hmm.forward(string,state,emission,transitionProb,emissionProb)) #return the path prob here

### Run the program here

In [54]:
if __name__ == "__main__":
    main('rosalind_ba10d.txt')

1.5056333776177484e-49
