In [3]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
######################
# Speech recognition #
######################
# 1. Work through the following resources:
#     Viterbi algorithm. (n.d.). In Wikipedia. Retrieved November 9, 2016, from
#     https://en.wikipedia.org/wiki/Viterbi_algorithm
#     The 44 Phonemes in English. (n.d.). Retrieved November 9, 2016, from
#     http://www.dyslexia-reading-well.com/44-phonemes-in-english.html
# 2. Read and run the code given below.
# 3. Answer the following questions:
#   a. What does the transition_probability table describe?

#  markov chain of going from one letter to another

#   b. What does the emission_probability table describe?

 # probability a letter can come from a sound

#   c. What does the start_probability table describe?

# prob a sentence starts with a given letter

#   d. What does the Viterbi algorithm do with these tables?
#   e. Describe the optimal substructure found in this problem.
#   f. How should one interpret the output of the Viterbi algorithm?




def viterbi(obs, states, start_p, trans_p, emit_p):
    V = [{}]
    for st in states:  # for all letters
        V[0][st] = {"prob": start_p[st] * emit_p[st][obs[0]], "prev": None}
        # create 0 position, with prob = start probability * emit probability 
    # Run Viterbi when t > 0
    for t in range(1, len(obs)):   # for every string in input
        V.append({})
        for st in states:   # for all letters
            max_tr_prob = max(V[t - 1][prev_st]["prob"] * trans_p[prev_st][st]
                              for prev_st in states)  # check all possible states
                    # all 5 past letters prob of occuring * prob of transition from that to this (max , a letter)
                    # finds the previous state which leads to highest prob ( prev state occuring x transition)
            for prev_st in states:  # adds that prev state + prob * emit of obs
                if V[t - 1][prev_st]["prob"] * trans_p[prev_st][
                        st] == max_tr_prob: # if the prev state is the max one
                    max_prob = max_tr_prob * emit_p[st][obs[t]]    # max prob (prev *trans) * emit
                    V[t][st] = {"prob": max_prob, "prev": prev_st} # records the probability and previous state
                    break
    for line in dptable(V):
        print (line)
    opt = []
    # The highest probability
    max_prob = max(value["prob"] for value in V[-1].values())
    previous = None
    # Get most probable state and its backtrack
    for st, data in V[-1].items():
        if data["prob"] == max_prob:
            opt.append(st)
            previous = st
            break
    # Follow the backtrack till the first observation
    for t in range(len(V) - 2, -1, -1):
        opt.insert(0, V[t + 1][previous]["prev"])
        previous = V[t + 1][previous]["prev"]

    print ('The steps of states are ' + ' '.join(
        opt) + ' with highest probability of %s' % max_prob)


def dptable(V):
    # Print a table of steps from dictionary
    yield " ".join(("%9s" % i) for i in range(len(V)))
    for state in V[0]:
        yield "%.9s: " % state + " ".join("%.9s" % ("%.3E" % v[state]["prob"])
                                          for v in V)


states = 'abcdr'
observations = ('/a/', '/b/', '/r/', '/ã/', '/k/', '/a/', '/d/', '/d/', '/b/',
                '/r/', '/ã/')
start_probability = {'a': 0.4, 'b': 0.1, 'c': 0.1, 'd': 0.3, 'r': 0.1}

transition_probability = {'a': {'a': 0,
                                'b': 0.3,
                                'c': 0.3,
                                'd': 0.3,
                                'r': 0.1},
                          'b': {'a': 0.2,
                                'b': 0,
                                'c': 0.2,
                                'd': 0.1,
                                'r': 0.5},
                          'c': {'a': 0.5,
                                'b': 0.1,
                                'c': 0.1,
                                'd': 0.1,
                                'r': 0.1},
                          'd': {'a': 0.5,
                                'b': 0.2,
                                'c': 0.2,
                                'd': 0.0,
                                'r': 0.1},
                          'r': {'a': 0.7,
                                'b': 0.1,
                                'c': 0.1,
                                'd': 0.1,
                                'r': 0}}
emission_probability = {'a': {'/a/': 0.4,
                              '/ã/': 0.3,
                              '/b/': 0.05,
                              '/r/': 0.05,
                              '/d/': 0.15,
                              '/k/': 0.05},
                        'b': {'/a/': 0.05,
                              '/ã/': 0.05,
                              '/b/': 0.65,
                              '/r/': 0.05,
                              '/d/': 0.2,
                              '/k/': 0.05},
                        'c': {'/a/': 0.05,
                              '/ã/': 0.05,
                              '/b/': 0.05,
                              '/r/': 0.05,
                              '/d/': 0.05,
                              '/k/': 0.75},
                        'd': {'/a/': 0.05,
                              '/ã/': 0.05,
                              '/b/': 0.2,
                              '/r/': 0.05,
                              '/d/': 0.6,
                              '/k/': 0.05},
                        'r': {'/a/': 0.05,
                              '/ã/': 0.05,
                              '/b/': 0.05,
                              '/r/': 0.7,
                              '/d/': 0.05,
                              '/k/': 0.1}}

viterbi(observations, states, start_probability, transition_probability,
        emission_probability)

        0         1         2         3         4         5         6         7         8         9        10
a: 1.600E-01 3.750E-04 3.120E-04 2.293E-03 1.365E-06 1.032E-04 2.709E-07 1.393E-06 9.287E-09 2.717E-09 1.997E-08
b: 5.000E-03 3.120E-02 9.600E-05 5.460E-05 3.440E-05 2.580E-06 6.192E-06 7.430E-07 2.717E-07 8.359E-10 4.754E-10
c: 5.000E-03 2.400E-03 3.120E-04 5.460E-05 5.160E-04 2.580E-06 1.548E-06 1.857E-07 2.090E-08 2.717E-09 4.754E-10
d: 1.500E-02 9.600E-03 1.560E-04 5.460E-05 3.440E-05 2.580E-06 1.857E-05 3.715E-07 8.359E-08 1.358E-09 4.754E-10
r: 5.000E-03 8.000E-04 1.092E-02 2.400E-06 2.293E-05 2.580E-06 5.160E-07 1.548E-07 1.857E-08 9.508E-08 2.090E-11
The steps of states are a b r a c a d a b r a with highest probability of 1.9966878067499996e-08
