### Problem 24
Soft Decoding Problem

Given: A string x, followed by the alphabet Σ from which x was constructed, followed by the states States, transition matrix Transition, and emission matrix Emission of an HMM (Σ, States, Transition, Emission).

Return: The probability Pr(πi = k|x) that the HMM was in state k at step i (for each state k and each step i).

In [65]:
import numpy as np
class SoftDecoding():
    def __init__(self, infile):
        '''
        contructor: saves attributes 
        
        Parameters
        ----------
            infile : file name
                
        '''
        self.file=infile
    
    def readHMM(self):
        """
        read HMM file
        
        Return
        ----------
        String:list
            the index of observable string the path emits
        state:list
            all the states
        emission:
            all the emission 
        transition.astype(float):ndarray
            transition probability
        emissionProb.astype(float):ndarray
            emission matrix
        """
        with open(self.file) as rawData:
            data=rawData.readlines()
        string=data[0].rstrip() #the first row is the obserable string
        emission=data[2].rstrip().split('\t') #emission list
        state=data[4].rstrip().split('\t') #state list
        #-----------------------from 7th to 8+len(state)-1 is transition matrix----------------------#
        transitionMatrix=data[8:8+len(state)-1] 
        transitionProb=np.array([data[7].rstrip().split('\t')[1:]]) #the first row
        for i in range(len(state)-1):
            transitionProb=np.append(transitionProb,[transitionMatrix[i].rstrip().split('\t')[1:]],axis=0)
        #-----------------------from 7th to 8+len(state)-1 is transition matrix----------------------#
        
        #---------------------from 5th line on, the rest is emission probability----------------------#
        probMatrix=data[10+len(state):] 
        emissionProb=np.array([data[9+len(state)].rstrip().split('\t')[1:]]) #emission probability starts from here
        for i in range(len(state)-1):
            #we append the probability to the matrix
            emissionProb=np.append(emissionProb,[probMatrix[i].rstrip().split('\t')[1:]],axis=0) 
        #---------------------from 5th line on, the rest is emission probability----------------------#
        String=[] #stores the index of the observe
        for i in string:
            String.append(emission.index(i)) #we can now use the index to access emission matrix
        return String,state,emission,transitionProb.astype(float),emissionProb.astype(float)
    
    def forward(self,string,state,emission,transitionProb,emissionProb):
        """
        Foward algorithm
        
        Parameters
        ----------
        string:list
            the index of observable string the path emits
        state:list
            all the states
        emission:
            all the emission 
        transitionProb:ndarray
            transition probability
        emissionProb:ndarray
            emission matrix
            
        Returns
        ----------
        stringProb:float
            the probability Pr(x) that the HMM emits x.
        """
        forwardMatrix=np.empty([len(state), len(string)]) #create an empty matrix to store the node
        forwardMatrix[:,0]=np.multiply(1/len(state),np.array(emissionProb[:,string[0]])) #calcalte the fisrt column
        #-----------------------------------fill the forwardMatrix---------------------------------------#
        for col in range(1,len(string)):
            for row in range(len(state)):
                #see the formula in markdown cell
                forwardMatrix[row,col]=sum(np.multiply(forwardMatrix[:,col-1],transitionProb[:,row]))*emissionProb[row,string[col]]
        #-----------------------------------fill the forwardMatrix---------------------------------------#     
        return forwardMatrix, sum(forwardMatrix[:,-1])
    
    def backward(self,string,state,emission,transitionProb,emissionProb):
        """
        Backward Algorithm
        
        Parameters
        ----------
        string:list
            the index of observable string the path emits
        state:list
            all the states
        emission:
            all the emission 
        transitionProb:ndarray
            transition probability
        emissionProb:ndarray
            emission matrix
            
        Returns
        ----------
        stringProb:float
            the probability Pr(x) that the HMM emits x.
        """
        backwardMatrix=np.empty([len(state), len(string)]) #create an empty matrix to store the node
        backwardMatrix[:,-1]=1 #calculate the fisrt column
        #-----------------------------------fill the backwardMatrix---------------------------------------#
        for col in range(len(string)-2,-1,-1):
            for row in range(len(state)):
                backwardMatrix[row,col]=sum(np.multiply(np.multiply(backwardMatrix[:,col+1],emissionProb[:,string[col+1]]),transitionProb[row,:]))
        #-----------------------------------fill the backwardMatrix---------------------------------------#     
        return backwardMatrix
    
    def softDecoding(self,string,state,emission,transitionProb,emissionProb):
        """
        Backward Algorithm
        
        Parameters
        ----------
        string:list
            the index of observable string the path emits
        state:list
            all the states
        emission:
            all the emission 
        transitionProb:ndarray
            transition probability
        emissionProb:ndarray
            emission matrix
            
        Returns
        ----------
        np.around(np.transpose(decodeMatrix),4):ndarray
            print the transpose of probability Pr(πi = k|x) that the HMM was in state k at step i
        """
        decodeMatrix=np.empty([len(state),len(string)]) #initiate the matrix
        forwardMatrix,Px=self.forward(string,state,emission,transitionProb,emissionProb) #calculate the forward matrix
        backwardMatrix=self.backward(string,state,emission,transitionProb,emissionProb) #calculate the backward matrix
        for col in range(len(string)):
            for row in range(len(state)):
                #calculate Pr(πi = k|x)
                decodeMatrix[row,col]=forwardMatrix[row,col]*backwardMatrix[row,col]/Px
        return np.around(np.transpose(decodeMatrix),4)

### Main

In [66]:
def main(infile):
    '''
    Get the probability of the path here
    
    Parameters
        ----------
        infile : str 
            the filename  

        Returns
        -------
        STDOUT
    '''
    hmm=SoftDecoding(infile) #instantiation
    string,state,emission,transitionProb,emissionProb=hmm.readHMM()#extract the information from the file
    print('\t'.join(state))#print all the states
    for i in hmm.softDecoding(string,state,emission,transitionProb,emissionProb):#print the result
        print(*i,sep='\t')

### Run the program here

In [68]:
if __name__ == "__main__":
    main('rosalind_ba10j.txt')

A	B
0.5799	0.4201
0.2331	0.7669
0.2502	0.7498
0.3902	0.6098
0.2186	0.7814
0.3515	0.6485
0.3393	0.6607
0.2392	0.7608
0.3331	0.6669
0.1413	0.8587
