In [2]:
import numpy as np

In [54]:
tran = np.matrix([[.8, .2],[.6, .3], [.4, .5]])

In [10]:
observations = np.matrix([[.2, .4, .4], [.5, .4, .1]])

In [11]:
tran

matrix([[ 0.8,  0.2],
        [ 0.6,  0.3],
        [ 0.4,  0.5]])

In [12]:
observations

matrix([[ 0.2,  0.4,  0.4],
        [ 0.5,  0.4,  0.1]])

In [55]:
events = [1, 3, 1]
unobserved_states = ['HOT', 'COLD']

In [75]:
viterbi(tran, observations, events)

HOT
HOT
COLD


In [77]:
def viterbi (transition, observations, events):
    """ Computes sequnce of hidden states, given observed events.
    Arguments: 
        transition: transition matrix with start probabilites as first row
        observations: observation liklihood matrix, with states as rows, and vocabulary as columns
        events: sequence of observed events
        
    Returns: 
        generator, which yields the states
    """
    
    n_states = transition.shape[1]
    n_events = len(events)
    v = np.zeros((n_states, n_events))
    bp = v.copy()
    
    # initialization step
    for s in range(n_states):
        v[s,0] = tran[0,s] * observations[s, events[0]-1]

    # induction step
    for t in range (1, n_events):
        for s in range(n_states):
            tmp = []
            for s_prime in range (n_states): 
                prev_t = v[s_prime, t-1]
                tran_s_prime_to_s = tran[s_prime + 1, s]
                obser_s_given_t = observations[s, events[t]-1]
                tmp.append(prev_t * tran_s_prime_to_s *obser_s_given_t)
            # now that all interim probabilities have been computed for given state, get max
            # and also store the index of the argmax
            v[s,t] = max(tmp)
            bp[s,t] = np.argmax(tmp)

    # termination step
    q = np.argmax(v[:, n_events-1]) # want to get the argmax of the final time -- it will return a state index

    # back reference step 
    for i in reversed(range(n_events)):
        yield q
        q = int(bp[q,i])

    

In [95]:
def get_sequence(viterbi_gen, names_events):
    """ translate viterbi generater inot a sequence of state anme
    """
    sequence = []
    for state in viterbi_gen:
        name = names_events[state]
        sequence.insert(0, name)
        
    return(sequence)
    

In [100]:
ice_cream = viterbi(tran, observations, [3,3,3])
get_sequence(ice_cream, unobserved_states)

['HOT', 'HOT', 'HOT']

In [105]:
from numpy import genfromtxt
observations = genfromtxt('/Users/amyburkhardt/Dropbox/NLP Readings/hw 1/test_POS_book_example/test_observations.csv', delimiter=',')
tran = genfromtxt('/Users/amyburkhardt/Dropbox/NLP Readings/hw 1/test_POS_book_example/test_transitions.csv', delimiter=',')
unobserved_states = ['NNP', 'MD','VB','JJ','NN','RB','DT']
events = [1, 2, 3, 4, 5]

In [106]:
ice_cream = viterbi(tran, observations, events)
get_sequence(ice_cream, unobserved_states)

['NNP', 'MD', 'VB', 'DT', 'NN']

In [None]:
# 1. look up by string, instead of number
# 2. divide training and test
# 3. implement baseline most frequent
# 4. next steps for me on my own. 