## Part-of-Speech Tagger

In [3]:
import pandas as pd
import numpy as np

## create transition properties

In [8]:
tran = np.matrix([[.8, .2, 0],[.6, .3, .1], [.4, .5, .1]])

In [9]:
tran

matrix([[ 0.8,  0.2,  0. ],
        [ 0.6,  0.3,  0.1],
        [ 0.4,  0.5,  0.1]])

## Create observations likelihood. Include all, and then index what you want

In [26]:
observations = np.matrix([[.2, .4, .4], [.5, .4, .1]]).transpose()

In [27]:
observations

matrix([[ 0.2,  0.5],
        [ 0.4,  0.4],
        [ 0.4,  0.1]])

## create empty dataframe that is two rows, and three columns (one column per observation)

In [16]:
df = np.zeros((2, 3))

In [17]:
df

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [30]:
unobserved = ['HOT', 'COLD']
events = [1, 3, 1]

In [31]:
probs = []

# best_ind should be the same size as df
# Each column stores the most likely states for time t
# Each row corresponds to a state (HOT, COLD) and stores the most likely previous state (HOT=0, COLD=1)
best_ind = np.zeros((2,3))

# begin in the first column by setting the Viterbi value in each
# cell to the product of the transition probability and
# the observation probability
for i in range(0,len(unobserved)):
    df [i,0] = tran[0,i] * observations[events[0]-1,i]
    index = np.argmax(df[:,0])
    best_ind[i][0] = index

# move on column by column (vocabulary by vocabulary)
for t in range(1,len(events)):    
    for j in range(0,len(unobserved)):
        for k in range (0,len(unobserved)):
            # compute the probability of moving into each state
            # previous Viterbi path probability from previous step (df [j,t-1])
            prev = df[k, t-1]
                
            # the state observation likelihood (observations [k,t])
            event = events[t]-1 # minus 1 because the index into observations should start at 0
            obs_prob = observations[event, j]
            
            # the transition probability
            tran_prob = tran[k+1, j] # k+1 because the first row of tran is the start probabilities
            
            prob = obs_prob * prev * tran_prob
            probs.append(prob)
            
        # select the largest probability for moving into each state to be stored
        # in table
        df[j,t] = max(probs)

        # get the index of which state had the larger probabilty: Hot (0) or Cold (1)
        index = np.argmax(probs)
        best_ind[j][t] = index
        probs = []

# termination step:
T = len(events) - 1
final_prob = []
for k in range(0,len(unobserved)):
    final_prob.append(df[k, T] * tran[k+1, T])

# backtrack:
back = np.argmax(final_prob)
for i in reversed(range(len(best_ind[1]))):
    back = int(best_ind[back, i])
    print(unobserved[back])

print(df)
print(best_ind)

HOT
HOT
HOT
[[ 0.16      0.0384    0.004608]
 [ 0.1       0.005     0.00576 ]]
[[ 0.  0.  0.]
 [ 0.  1.  0.]]
