In [2]:
from operator import itemgetter
from math import log2

Viterbi algorithm
=========

In [3]:
start_p = {"F": 0.5, "L": 0.5}
E = {("F", "o"): 0.5, ("F", "-"): 0.5, ("L", "o"): 0.9, ("L", "-"): 0.1}
T = {("F", "F"): 0.8, ("F", "L"): 0.2, 
     ("L", "F"): 0.1, ("L", "L"): 0.9}
states = start_p.keys()
x = "-o-o-oo--oooo-o"

Viterbi in probability space
--------------------------

In [4]:
V = []  # probability of the most probable path ending in state k with observation xi
P = []  # a previous state for which transition was most probable

for xi in x:
    prev = {l: max((V[-1][k] * T[k, l], k) for k in states) if V else (start_p[l], "0")
            for l in states}
    V.append({l: E[l, xi] * prev[l][0] for l in states})
    P.append({l: prev[l][1] for l in states})


In [5]:
V, P

([{'F': 0.25, 'L': 0.05},
  {'F': 0.1, 'L': 0.045000000000000005},
  {'F': 0.04000000000000001, 'L': 0.004050000000000001},
  {'F': 0.016000000000000004, 'L': 0.0072000000000000015},
  {'F': 0.006400000000000002, 'L': 0.0006480000000000001},
  {'F': 0.002560000000000001, 'L': 0.0011520000000000005},
  {'F': 0.0010240000000000004, 'L': 0.0009331200000000005},
  {'F': 0.0004096000000000002, 'L': 8.398080000000005e-05},
  {'F': 0.00016384000000000008, 'L': 8.192000000000004e-06},
  {'F': 6.553600000000003e-05, 'L': 2.9491200000000013e-05},
  {'F': 2.6214400000000015e-05, 'L': 2.3887872000000012e-05},
  {'F': 1.0485760000000006e-05, 'L': 1.934917632000001e-05},
  {'F': 4.1943040000000025e-06, 'L': 1.5672832819200008e-05},
  {'F': 1.6777216000000011e-06, 'L': 1.410554953728001e-06},
  {'F': 6.710886400000005e-07, 'L': 1.1425495125196809e-06}],
 [{'F': '0', 'L': '0'},
  {'F': 'F', 'L': 'F'},
  {'F': 'F', 'L': 'L'},
  {'F': 'F', 'L': 'F'},
  {'F': 'F', 'L': 'L'},
  {'F': 'F', 'L': 'F'},
  {'F

In [6]:
end_state, prob = max(V[-1].items(), key=itemgetter(1))
print("End state: {}. P={:.4f}".format(end_state, prob))

End state: L. P=0.0000


In [7]:
path = [end_state]
for prev in reversed(P[1:]):
    path.append(prev[path[-1]])
path.reverse()
print("Path: {}".format("".join(path)))

Path: FFFFFFFFFLLLLLL


Viterbi in log space
-------------------

In [8]:
V = []  # probability of the most probable path ending in state k with observation xi
P = []  # a previous state for which transition was most probable

TL = {k: log2(v) for k, v in T.items()}
EL = {k: log2(v) for k, v in E.items()}
SL = {k: log2(v) for k, v in start_p.items()}
    
for xi in x:
    prev = {l: max((V[-1][k] + TL[k, l], k) for k in states) if V else (SL[l], "0")
            for l in states}
    V.append({l: EL[l, xi] + prev[l][0] for l in states})
    P.append({l: prev[l][1] for l in states})

In [9]:
path = [end_state]
for prev in reversed(P[1:]):
    path.append(prev[path[-1]])
path.reverse()
print("Path: {}".format("".join(path)))

Path: FFFFFFFFFLLLLLL


Viterbi in a nice function
-------------------------

In [10]:
def viterbi(s, T, E, P0):
    """
    Return most probable state path given sequence,
    transition and emission matrix
    """
    T = {k: log2(v) for k, v in T.items()}
    E = {k: log2(v) for k, v in E.items()}
    P0 = {k: log2(v) for k, v in P0.items()}
    states = start_p.keys()
    
    V, P = [], []
    for si in s:
        prev = {l: max((V[-1][k] + T[k, l], k) for k in states) if V else (P0[l], "0")
                for l in states}
        V.append({l: E[l, si] + prev[l][0] for l in states})
        P.append({l: prev[l][1] for l in states})

    end_state, prob = max(V[-1].items(), key=itemgetter(1))
    path = [end_state]
    for prev in reversed(P[1:]):
        path.append(prev[path[-1]])
    path.reverse()
    return path, prob, V, P

Try a function on a loaded coin case
---

In [96]:
P0 = {"F": 0.5, "L": 0.5}
E = {("F", "o"): 0.5, ("F", "-"): 0.5, ("L", "o"): 0.9, ("L", "-"): 0.1}
T = {("F", "F"): 0.8, ("F", "L"): 0.2, 
     ("L", "F"): 0.1, ("L", "L"): 0.9}
x = "-o-o-oo--oooo-o"

In [97]:
path, p, dpt, P = viterbi(x, T, E, P0)
print("Path: {}".format("".join(path)))
print("Probability: {:.3e}".format(2**p))

Path: FFFFFFFFFLLLLLL
Probability: 1.143e-06


Joint probability of observed sequence x and a path
---

In [116]:
from operator import mul
from functools import reduce

def pair_walk(s):
    """Generate k-mers from a sequence"""
    for i in range(len(s)-1):
        yield s[i], s[i+1]

product = lambda x: reduce(mul, x, 1)

def joint_probability(x, path, T, E, P0):
    p = P0[path[0]]
    p *= product(E[s1, xi] * T[s1, s2] for (xi, (s1, s2)) in zip(x, pair_walk(path)))
    p *= E[path[-1], x[-1]]
    return p

In [117]:
jp = joint_probability(x, path, T, E, P0)
print("{:.3e} {}".format(jp, "".join(path)))

2.856e-08 LFFFFFFFFLLLLLL


In [118]:
path[-1] = "F"
jp = joint_probability(x, path, T, E, P0)
print("{:.3e} {}".format(jp, "".join(path)))

1.763e-09 LFFFFFFFFLLLLLF


In [119]:
path[-1] = "L"; path[0] = "L"
jp = joint_probability(x, path, T, E, P0)
print("{:.3e} {}".format(jp, "".join(path)))

2.856e-08 LFFFFFFFFLLLLLL


Going crazy
-----------
Construct a random path, compute joint probability, report best path every 1000 paths. Assume the same probability for the states and multinomial model. (This is of course a wrong solution to the problem and is rather an academic exercise -- consider increasing the problem to longer paths).

In [138]:
import random
random.seed(42)
states = tuple(P0.keys())
best_jp, best_path = 0, None
for i in range(20000):
    r_path = "".join(random.choice(states) for _ in x)
    jp = joint_probability(x, r_path, T, E, P0)
    if jp > best_jp:
        best_jp, best_path = jp, r_path
    if not (i % 1000):
        print("{:10d} {:.3e} {}".format(i, best_jp, "".join(best_path)))
    

         0 6.754e-10 LLFLLLLLFLLLLLL
      1000 1.376e-07 FFFFFFFFFFFFLLL
      2000 1.376e-07 FFFFFFFFFFFFLLL
      3000 3.020e-07 FFFFFFFFFFFFFFL
      4000 3.020e-07 FFFFFFFFFFFFFFL
      5000 3.020e-07 FFFFFFFFFFFFFFL
      6000 3.020e-07 FFFFFFFFFFFFFFL
      7000 3.020e-07 FFFFFFFFFFFFFFL
      8000 3.020e-07 FFFFFFFFFFFFFFL
      9000 3.020e-07 FFFFFFFFFFFFFFL
     10000 3.020e-07 FFFFFFFFFFFFFFL
     11000 3.020e-07 FFFFFFFFFFFFFFL
     12000 3.020e-07 FFFFFFFFFFFFFFL
     13000 3.020e-07 FFFFFFFFFFFFFFL
     14000 3.020e-07 FFFFFFFFFFFFFFL
     15000 3.020e-07 FFFFFFFFFFFFFFL
     16000 3.020e-07 FFFFFFFFFFFFFFL
     17000 3.020e-07 FFFFFFFFFFFFFFL
     18000 3.020e-07 FFFFFFFFFFFFFFL
     19000 1.143e-06 FFFFFFFFFLLLLLL
