In [1]:
import math

In [2]:
def log(x):
    return -math.inf if x == 0 else math.log(x)

def get_log_prob_of_a_given_path(path: str, seq: str) -> float:
    if len(path) != len(seq):
        raise ValueError("Path and sequence are of different lenghts.")

    prob = 0.0
    for i in range(len(seq)):
        p1 = path[i]
        s1 = seq[i]
        if i == 0:
            prob += log(start_prob[p1])
        else:
            prob += log(trans_prob[path[i-1]][p1])
        prob += log(emit_prob[p1][s1])

    # Transition to End (only possible from I)
    last_state = path[-1]
    if last_state == 'I':
        prob += log(trans_prob['I']['end'])  # I → End

    return prob

# Defining parameters as in Nature Primer
states = ['E', '5', 'I']
start_prob = {'E': 1.0, '5': 0.0, 'I': 0.0}

trans_prob = {
    'E': {'E': 0.9, '5': 0.1},
    '5': {'I': 1.0},
    'I': {'I': 0.9, 'end': 0.1},
}

emit_prob = {
    'E': {'A': 0.25, 'C': 0.25, 'G': 0.25, 'T': 0.25},
    '5': {'A': 0.05, 'C': 0.0, 'G': 0.95, 'T': 0.0},
    'I': {'A': 0.4, 'C': 0.1, 'G': 0.1, 'T': 0.4}
}

path = "EEEEEEEEEEEEEEEEEE5IIIIIII"
sequence = "CTTCATGTGAAAGCAGACGTAAGTCA"

print("Log probability of given path:", round(get_log_prob_of_a_given_path(path, sequence),2))


Log probability of given path: -41.22


In [3]:
V = [{}]
path = {}

for s in states:
    V[0][s] = log(start_prob[s]) + log(emit_prob[s].get(sequence[0], 0))
    path[s] = [s]

# using a recursive approach
for i in range(1, len(sequence)):
    V.append({})
    newpath = {}

    for curr_state in states:
        max_prob = -math.inf
        best_prev = None

        for prev_state in states:
            # transition probability from prev_state to curr_state
            trans_p = trans_prob.get(prev_state, {}).get(curr_state, 0)

            if trans_p > 0:
                # emission probability of current state emitting current observation
                emit_p = emit_prob[curr_state].get(sequence[i], 0)

                if emit_p > 0:
                    prob = V[i-1][prev_state] + math.log(trans_p) + math.log(emit_p)

                    if prob > max_prob:
                        max_prob = prob
                        best_prev = prev_state

        # Set the max probability for this current state at time i
        V[i][curr_state] = max_prob

        # Buidling path
        if best_prev is not None:
            newpath[curr_state] = path[best_prev] + [curr_state]
        else:
            newpath[curr_state] = [curr_state]  # fallback in case of no valid path

    # Update path for next iteration
    path = newpath

# Termination
n = len(sequence) - 1
(prob, state) = max((V[n][s], s) for s in states)

logp = round(prob, 2)
best_path = ''.join(path[state])


print("Viterbi best log probability:", logp)
print("Most likely path:", best_path)


Viterbi best log probability: -38.68
Most likely path: EEEEEEEEEEEEEEEEEEEEEEEEEE
