### Problem 21
Given: A string x, followed by the alphabet Σ from which x was constructed, followed by the states States, transition matrix Transition, and emission matrix Emission of an HMM (Σ, States, Transition, Emission).

Return: A path that maximizes the (unconditional) probability Pr(x, π) over all possible paths π.

In [2]:
import numpy as np
class ProbString():
    def __init__(self, infile):
        '''
        contructor: saves attributes 
        
        Parameters
        ----------
            infile : file name
                
        '''
        self.file=infile
    
    def readHMM(self):
        """
        read HMM file
        
        Return
        ----------
        String:list
            the index of observable string the path emits
        state:list
            all the states
        emission:
            all the emission 
        transition.astype(float):ndarray
            transition probability
        emissionProb.astype(float):ndarray
            emission matrix
        """
        with open(self.file) as rawData:
            data=rawData.readlines()
        string=data[0].rstrip() #the first row is the obserable string
        emission=data[2].rstrip().split('\t') #emission list
        state=data[4].rstrip().split('\t') #state list
        #-----------------------from 7th to 8+len(state)-1 is transition matrix----------------------#
        transitionMatrix=data[8:8+len(state)-1] 
        transitionProb=np.array([data[7].rstrip().split('\t')[1:]]) #the first row
        for i in range(len(state)-1):
            transitionProb=np.append(transitionProb,[transitionMatrix[i].rstrip().split('\t')[1:]],axis=0)
        #-----------------------from 7th to 8+len(state)-1 is transition matrix----------------------#
        
        #---------------------from 5th line on, the rest is emission probability----------------------#
        probMatrix=data[10+len(state):] 
        emissionProb=np.array([data[9+len(state)].rstrip().split('\t')[1:]]) #emission probability starts from here
        for i in range(len(state)-1):
            #we append the probability to the matrix
            emissionProb=np.append(emissionProb,[probMatrix[i].rstrip().split('\t')[1:]],axis=0) 
        #---------------------from 5th line on, the rest is emission probability----------------------#
        String=[] #stores the index of the observe
        for i in string:
            String.append(emission.index(i)) #we can now use the index to access emission matrix
        return String,state,emission,transitionProb.astype(float),emissionProb.astype(float)
    
    def Vertibi(self,string,state,emission,transitionProb,emissionProb):
        """
        Calculate the maximal probability Pr(x) that the HMM emits x.
        
        Parameters
        ----------
        string:list
            the index of observable string the path emits
        state:list
            all the states
        emission:
            all the emission 
        transitionProb:ndarray
            transition probability
        emissionProb:ndarray
            emission matrix
            
        Returns
        ----------
        hiddenPath:str
            the most likely hidden path that emits x.
        """
        vertibiGraph=np.empty([len(state), len(string)]) #create an empty matrix to store the node
        vertibiGraph[:,0]=np.multiply(1/len(state),np.array(emissionProb[:,string[0]]))#calcalte the fisrt column
        stateMatrix=np.zeros((len(state), len(string)-1)).astype(np.int32) #stateMatrix is for backtracking
        for col in range(1,len(string)):
            for row in range(len(state)):
                temp=np.multiply(vertibiGraph[:,col-1],transitionProb[:,row]) #temp for get the max
                vertibiGraph[row,col]=np.max(temp)*emissionProb[row,string[col]] #times the emission prob
                #the element of vertibiGraph[row,col-1] tells us which precursor leads to biggest vertibiGraph[row,col]
                stateMatrix[row,col-1]=np.argmax(temp) #stateMatrix[row,col-1] stores the index of max incoming node 
        #----------------------------------------Backtracking--------------------------------------#
        path = np.zeros(len(string)).astype(np.int32)
        path[-1] = np.argmax(vertibiGraph[:, -1]) #the last one is the index of max of the last column 
        for n in range(len(string)-2, -1, -1):#reverse the order
            path[n] = stateMatrix[int(path[n+1]), n]
        hiddenPath='' #now we add the state to the path
        for i in path:
            hiddenPath+=state[i]
        #----------------------------------------Backtracking--------------------------------------#
        return hiddenPath

### Main

In [3]:
def main(infile):
    '''
    Get the probability of the path here
    
    Parameters
        ----------
        infile : str 
            the filename 

        Returns
        -------
        STDOUT
    '''
    hmm=ProbString(infile) #instantiation
    string,state,emission,transitionProb,emissionProb=hmm.readHMM() #extract the information from the infile
    print(hmm.Vertibi(string,state,emission,transitionProb,emissionProb)) #print the hidden path

### Run the program here

In [4]:
if __name__ == "__main__":
    main('rosalind_ba10c.txt')

CCCCCCCBAACBACBACCCCBACCCBAACCCCCBACBAACBAAAAACBAAACCBAAAAACCBAAACBAACBACBAAACCCCCCBBAAAAAAAAACCCBAC
