### Problem 20
Probability of an Outcome Given a Hidden Path Problem

Given: A string x, followed by the alphabet Σ from which x was constructed, followed by a hidden path π, followed by the states States and emission matrix Emission of an HMM (Σ, States, Transition, Emission).

Return: The conditional probability Pr(x|π) that string x will be emitted by the HMM given the hidden path π.

In [17]:
import numpy as np
class ProbString():
    def __init__(self, infile):
        '''
        contructor: saves attributes 
        
        Parameters
        ----------
            infile : file name
                
        '''
        self.file=infile
    
    def readHMM(self):
        """
        read HMM file
        
        Return
        ----------
        string:str
            the observable string the path emits
        path:str
            the hidden path
        state:list
            all the states
        emission:
            all the emission 
        transition.astype(float):ndarray
            transition probability
        """
        with open(self.file) as rawData:
            data=rawData.readlines()
        string=data[0].rstrip() #the first row is the obserable string
        path=data[4].rstrip() #the 5th row is the obserable string
        emission=data[2].rstrip().split('\t') #emission list
        state=data[6].rstrip().split('\t') #state list
        probMatrix=data[10:] #from 5th line on, the rest is transition probability
        emissionProb=np.array([data[9].rstrip().split('\t')[1:]]) #emission probability starts from here
        for i in range(len(state)-1):
            #we append the probability to the matrix
            emissionProb=np.append(emissionProb,[probMatrix[i].rstrip().split('\t')[1:]],axis=0)
        return string,path,state,emission,emissionProb.astype(float)
    
    def probString(self,string,path,state,emission,emissionProb):
        """
        Calculate the conditional probability of the string given the hidden path
        
        Parameters
        ----------
        string:str
            the observable string the path emits
        path:str
            the hidden path
        state:list
            all the states
        emission:
            all the emission 
        transition.astype(float):ndarray
            transition probability
            
        Return
        ----------
        prob:float
            the probability of the string
        """
        prob=1 #initialize the overall probability
        for i in range(len(string)):
            emissionIndex=emission.index(string[i]) #will be the row index of the numpy array
            stateIndex=state.index(path[i])#will be the column index of the numpy array
            prob*=emissionProb[stateIndex,emissionIndex] #exact the emission prob from the np array
        return prob

### Main

In [18]:
def main(infile):
    '''
    Get the probability of the path here
    
    Parameters
        ----------
        infile : str 
            the filename of the fasta file

        Returns
        -------
        STDOUT
    '''
    hmm=ProbString(infile) #instantiation
    string,path,state,emission,emissionProb=hmm.readHMM() #extract information from the infile
    print(hmm.probString(string,path,state,emission,emissionProb))


### Run the program here

In [19]:
if __name__=="__main__":
    main('rosalind_ba10b.txt')

2.2502074775148954e-30
