### Problem 19
Probability of a Hidden Path Problem

Given: A hidden path π followed by the states States and transition matrix Transition of an HMM (Σ, States, Transition, Emission).

Return: The probability of this path, Pr(π). You may assume that initial probabilities are equal.

In [134]:
import numpy as np
import itertools
class ProbHMM():
     """
    A class to calculate the probability of a hidden marcov path

    ...

    Attributes
    ----------
    file : file
        file name 

    Methods
    -------
    readHMM:
        read file which store information of the HMM
    probPath(path,state,transition):
        calculate the prbability of getting such a hidden path
    """
    def __init__(self, infile):
        '''
        contructor: saves attributes 
        
        Parameters
        ----------
            infile : file name
                
        '''
        self.file=infile
    
    def readHMM(self):
        """
        read HMM file
        
        Return
        ----------
        path:str
            the hidden path
        state:list
            all the states
        transition.astype(float):ndarray
            transition probability
        """
        with open(self.file) as rawData:
            data=rawData.readlines()
        path=data[0].rstrip() #the first row is the path
        state=data[2].rstrip().split('\t') 
        probMatrix=data[6:] #from 5th line on, the rest is transition probability
        transition=np.array([data[5].rstrip().split('\t')[1:]])
        for i in range(len(state)-1):
            #we append the probability to the matrix
            transition=np.append(transition,[probMatrix[i].rstrip().split('\t')[1:]],axis=0)
        return path,state,transition.astype(float)
    
    def probPath(self,path,state,transition):
        """
        read HMM file
        
        Parameters
        ----------
            path:str
                the hidden path
            state:list
                all the states
            transition.astype(float):ndarray
                transition probability
        
        Return
        ----------
        probability:float
            the probability of the hidden path
        """
        diState=list(map(''.join,itertools.product(''.join(state),repeat=2))) #we generate all the 2-mer states
        probability=1/len(state) #probability:the overall prob of the path, now it is the initial prob
        diStateProb={}#a dict that saves the probability of the transition probability
        diStateCount={}#a dict that saves the count of diStates in the path
        count=0 #save the number of how many diStates we have been processed
        for i in range(len(state)):
            for j in range(len(state)):
                diStateProb[diState[count]]=transition[i,j] #extract data from the ndarray
                count+=1 #we have processes one more diStates
        for i in range(len(path)-1):
            try: #if this diState has been in our dict
                diStateCount[path[i:i+2]]+=1
            except: #otherwise we assign 1 to this diState
                diStateCount[path[i:i+2]]=1           
        for i in diState: #calculate the probability
            probability*=diStateProb[i]**diStateCount[i]
        return probability

### Main

In [135]:
def main(infile):
    '''
    Get the probability of the path here
    
    Parameters
        ----------
        infile : str 
            the filename of the fasta file

        Returns
        -------
        STDOUT
    '''
    hmm=ProbHMM(infile)#instantiation
    path,state,transition=hmm.readHMM()#extract information from the infile
    print(hmm.probPath(path,state,transition)) #print the prob of the path

### Run the program here

In [138]:
if __name__ == "__main__":
    main('rosalind_ba10a.txt')

3.9760184304989196e-18
