**Probability of a Hidden Path Problem**

**Given**: A hidden path π followed by the states States and transition matrix Transition of an HMM (Σ, States, Transition, Emission).

**Return**: The probability of this path, Pr(π). You may assume that initial probabilities are equal.

In [22]:
import numpy as np
path = 'AABBBAABABAAAABBBBAABBABABBBAABBAAAABABAABBABABBAB'
states = ["A", "B"]
transition_mat = {'A':{'A':0.194, 'B':0.806},
                 'B':{'A': 0.273, 'B':0.727}}
transition_matrix = np.array([[0.194, 0.806], 
                             [0.273, 0.727]])
ans = 5.01732865318e-19

In [19]:
table = 'AB'.maketrans('AB', '01')
path.translate(table)

'00111001010000111100110101110011000010100110101101'

In [31]:
def prob_hidden_path(path, transition_mat):
    #initial probability
    prob_path = 0.5
    
    for idx in range(1, len(path)):
        prob_path *= transition_mat[path[idx-1]][path[idx]]
#     for idx in range(1, len(path)):
#         prob_path *= transition_mat[int(path[idx-1])][int(path[idx])]
    
    return prob_path

In [32]:
prob_hidden_path(path, transition_mat)
#prob_hidden_path(path.translate(table), transition_matrix)

1.0870338089802573e-18

In [33]:
# 10a probability of a hidden path
file = 'rosalind_ba10a.txt'
with open(file, 'r') as line:
    tmp = line.read().splitlines()

hidden_path = tmp[0]
states = tmp[2].split(' ')

col_syms = tmp[4].split('\t')[1:]
transition_matrix = {}
for i in range(5, len(tmp)):
    current_line = tmp[i].rstrip().split('\t')
    row_sym = current_line[0]
    transition_matrix[row_sym] = {}
    for j in range(1, len(current_line)):
        transition_matrix[row_sym][col_syms[j - 1]] = float(current_line[j])

In [34]:
prob_hidden_path(hidden_path, transition_matrix)

3.2649170591326406e-18

In [37]:
string = 'xxyzyxzzxzxyxyyzxxzzxxyyxxyxyzzxxyzyzxzxxyxyyzxxzx'
path = 'BBBAAABABABBBBBBAAAAAABAAAABABABBBBBABAABABABABBBB'
emission_mat = {'A':{'x': 0.612, 'y': 0.314, 'z': 0.074},
               'B': {'x': 0.346, 'y': 0.317, 'z': 0.335}}
ans = 1.93157070893e-28

In [38]:
def prob_outcome_given_path(x, hidden_path, emission_mat):
    emission_prob = 1
    for i in range(len(x)):
        emission_prob *= emission_mat[hidden_path[i]][x[i]]
    return emission_prob

In [39]:
prob_outcome_given_path(string, path, emission_mat)

1.9029976816712556e-28

In [40]:
# 10b probability of outcome given a hidden path
file = 'rosalind_ba10b.txt'

with open(file, 'r') as line:
    input_lines = line.read().splitlines()

x = input_lines[0]
alphabet = input_lines[2].split(' ')
hidden_path = input_lines[4]

col_syms = input_lines[8].split()
emission_matrix = {}
for line in input_lines[9:]:
    current_line = line.split()
    row_sym = current_line[0]
    emission_matrix[row_sym] = {}
    for j in range(1, len(current_line)):
        emission_matrix[row_sym][col_syms[j - 1]] = float(current_line[j])

In [41]:
prob_outcome_given_path(x, hidden_path, emission_matrix)

1.1740169496231375e-27

In [57]:
# 10c viterbi algorithm
string = 'xyxzzxyxyy'
transition_mat = {'A':{'A': 0.641, 'B': 0.359},
                 'B':{'A': 0.729, 'B': 0.271}}
emission_mat = {'A':{'x': 0.117, 'y': 0.691, 'z': 0.192},
               'B': {'x': 0.097, 'y': 0.42, 'z': 0.483}}

transition_mat = np.array([[0.641, 0.359],
                          [0.729, 0.271]])
emission_mat = np.array([[0.117, 0.691, 0.192],
                       [0.097, 0.42, 0.483]])
pi = [0.5, 0.5]
table = string.maketrans('xyz', '012')
obs = string.translate(table)
obs_arr = []
for i in obs:
    obs_arr.append(int(i))
obs_arr = np.array(obs_arr)
obs_arr

array([0, 1, 0, 2, 2, 0, 1, 0, 1, 1])

In [89]:
def viterbi(pi, transition_mat, emission_mat, string):
    num_states = np.shape(emission_mat)[0]
    T = np.shape(string)[0]
    
    # init blank path
    path = np.zeros(T)
    
    # delta: highest prob of any path that reaches state i
    delta = np.zeros((num_states, T))
    
    # phi: argmax by time step for each state
    phi = np.zeros((num_states, T))
    
    # init delta and phi
    delta[:, 0] = pi * emission_mat[:, string[0]]
    phi[:, 0] = 0
    
    print('\n Start Walk Forward \n')
    # forward algo
    for t in range(1, T):
        for s in range(num_states):
            delta[s, t] = np.max(delta[:, t-1] * transition_mat[:, s])* emission_mat[s, string[t]]
            phi[s, t] = np.argmax(delta[:, t-1] * transition_mat[:, s])
            print('s={s} and t={t}: phi[{s}, {t}] = {phi}'.format(s=s, t=t,
                                                                 phi=phi[s, t]))
    print(phi)
    # find optimal path
    print('-'*50)
    print('Start Backtrace\n')
    path[T-1] = np.argmax(delta[:, T-1])
    for t in range(T-2, -1, -1):
        path[t] = phi[int(path[t+1]), [int(t+1)]]
        print('path[{}] = {}'.format(t, path[t]))
    
    return path, delta, phi

In [91]:
path, delta, phi = viterbi(pi, transition_mat, emission_mat, obs_arr)


 Start Walk Forward 

s=0 and t=1: phi[0, 1] = 0.0
s=1 and t=1: phi[1, 1] = 0.0
s=0 and t=2: phi[0, 2] = 0.0
s=1 and t=2: phi[1, 2] = 0.0
s=0 and t=3: phi[0, 3] = 0.0
s=1 and t=3: phi[1, 3] = 0.0
s=0 and t=4: phi[0, 4] = 1.0
s=1 and t=4: phi[1, 4] = 1.0
s=0 and t=5: phi[0, 5] = 1.0
s=1 and t=5: phi[1, 5] = 0.0
s=0 and t=6: phi[0, 6] = 0.0
s=1 and t=6: phi[1, 6] = 0.0
s=0 and t=7: phi[0, 7] = 0.0
s=1 and t=7: phi[1, 7] = 0.0
s=0 and t=8: phi[0, 8] = 0.0
s=1 and t=8: phi[1, 8] = 0.0
s=0 and t=9: phi[0, 9] = 0.0
s=1 and t=9: phi[1, 9] = 0.0
[[0. 0. 0. 0. 1. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]
--------------------------------------------------
Start Backtrace

path[8] = 0.0
path[7] = 0.0
path[6] = 0.0
path[5] = 0.0
path[4] = 1.0
path[3] = 1.0
path[2] = 0.0
path[1] = 0.0
path[0] = 0.0


In [94]:
state_map = {0:'A', 1:'B'}
''.join([state_map[i] for i in path.tolist()])

'AAABBAAAAA'

In [96]:
# 10c viterbi algorithm
string = 'zyzxxyxxxyxzzxyyxzyxzxzxxzxxzyzyzzxzzzxyyzzxyyyyyzxzzxxxyzzzzzyyyzzyyzyzxzyyyyyyyyxxxzyyzxzxzxxyzzxz'

transition_mat = np.array([[0.346, 0.654],
                          [0.462, 0.538]])
emission_mat = np.array([[0.767, 0.213, 0.02],
                       [0.803, 0.004, 0.193]])
pi = [0.5, 0.5]
table = string.maketrans('xyz', '012')
obs = string.translate(table)
obs_arr = []
for i in obs:
    obs_arr.append(int(i))
obs_arr = np.array(obs_arr)
obs_arr

array([2, 1, 2, 0, 0, 1, 0, 0, 0, 1, 0, 2, 2, 0, 1, 1, 0, 2, 1, 0, 2, 0,
       2, 0, 0, 2, 0, 0, 2, 1, 2, 1, 2, 2, 0, 2, 2, 2, 0, 1, 1, 2, 2, 0,
       1, 1, 1, 1, 1, 2, 0, 2, 2, 0, 0, 0, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2,
       2, 1, 1, 2, 1, 2, 0, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 2, 1, 1,
       2, 0, 2, 0, 2, 0, 0, 1, 2, 2, 0, 2])

In [97]:
path, delta, phi = viterbi(pi, transition_mat, emission_mat, obs_arr)


 Start Walk Forward 

s=0 and t=1: phi[0, 1] = 1.0
s=1 and t=1: phi[1, 1] = 1.0
s=0 and t=2: phi[0, 2] = 0.0
s=1 and t=2: phi[1, 2] = 0.0
s=0 and t=3: phi[0, 3] = 1.0
s=1 and t=3: phi[1, 3] = 1.0
s=0 and t=4: phi[0, 4] = 1.0
s=1 and t=4: phi[1, 4] = 1.0
s=0 and t=5: phi[0, 5] = 1.0
s=1 and t=5: phi[1, 5] = 1.0
s=0 and t=6: phi[0, 6] = 0.0
s=1 and t=6: phi[1, 6] = 0.0
s=0 and t=7: phi[0, 7] = 1.0
s=1 and t=7: phi[1, 7] = 1.0
s=0 and t=8: phi[0, 8] = 1.0
s=1 and t=8: phi[1, 8] = 1.0
s=0 and t=9: phi[0, 9] = 1.0
s=1 and t=9: phi[1, 9] = 1.0
s=0 and t=10: phi[0, 10] = 0.0
s=1 and t=10: phi[1, 10] = 0.0
s=0 and t=11: phi[0, 11] = 1.0
s=1 and t=11: phi[1, 11] = 1.0
s=0 and t=12: phi[0, 12] = 1.0
s=1 and t=12: phi[1, 12] = 1.0
s=0 and t=13: phi[0, 13] = 1.0
s=1 and t=13: phi[1, 13] = 1.0
s=0 and t=14: phi[0, 14] = 1.0
s=1 and t=14: phi[1, 14] = 1.0
s=0 and t=15: phi[0, 15] = 0.0
s=1 and t=15: phi[1, 15] = 0.0
s=0 and t=16: phi[0, 16] = 0.0
s=1 and t=16: phi[1, 16] = 0.0
s=0 and t=17: phi[0, 

In [98]:
state_map = {0:'A', 1:'B'}
''.join([state_map[i] for i in path.tolist()])

'BABBBABBBABBBBAABBABBBBBBBBBBABABBBBBBBAABBBAAAAABBBBBBBABBBBBAAABBAABABBBAAAAAAAABBBBAABBBBBBBABBBB'

In [99]:
# # https://github.com/WuLC/ViterbiAlgorithm/blob/master/Viterbi.py
# ###########################################################################################################
# # Viterbi Algorithm for HMM
# # dp, time complexity O(mn^2), m is the length of sequence of observation, n is the number of hidden states
# ##########################################################################################################


# # five elements for HMM
# states = ('Healthy', 'Fever')
 
# observations = ('normal', 'cold', 'dizzy')
 
# start_probability = {'Healthy': 0.6, 'Fever': 0.4}
 
# transition_probability = {
#    'Healthy' : {'Healthy': 0.7, 'Fever': 0.3},
#    'Fever' :   {'Healthy': 0.4, 'Fever': 0.6},
#    }
 
# emission_probability = {
#    'Healthy' : {'normal': 0.5, 'cold': 0.4, 'dizzy': 0.1},
#    'Fever'   : {'normal': 0.1, 'cold': 0.3, 'dizzy': 0.6},
#    }



# def Viterbit(obs, states, s_pro, t_pro, e_pro):
# 	path = { s:[] for s in states} # init path: path[s] represents the path ends with s
# 	curr_pro = {}
# 	for s in states:
# 		curr_pro[s] = s_pro[s]*e_pro[s][obs[0]]
# 	for i in xrange(1, len(obs)):
# 		last_pro = curr_pro
# 		curr_pro = {}
# 		for curr_state in states:
# 			max_pro, last_sta = max(((last_pro[last_state]*t_pro[last_state][curr_state]*e_pro[curr_state][obs[i]], last_state) 
# 				                       for last_state in states))
# 			curr_pro[curr_state] = max_pro
# 			path[curr_state].append(last_sta)

# 	# find the final largest probability
# 	max_pro = -1
# 	max_path = None
# 	for s in states:
# 		path[s].append(s)
# 		if curr_pro[s] > max_pro:
# 			max_path = path[s]
# 			max_pro = curr_pro[s]
# 		# print '%s: %s'%(curr_pro[s], path[s]) # different path and their probability
# 	return max_path


# if __name__ == '__main__':
# 	obs = ['normal', 'cold', 'dizzy']
# 	print Viterbit(obs, states, start_probability, transition_probability, emission_probability)

SyntaxError: invalid syntax (<ipython-input-99-af7c070b72dd>, line 55)