In [1]:
import pandas as pd
import numpy as np

# Transition probabilities (A)
A_data = {
    'DT': [0, 0.2, 0.5, 0.3, 0, 0],
    'JJ': [0, 0, 0.8, 0.2, 0, 0],
    'NN': [0, 0, 0, 0.1, 0.9, 0],
    'NNS': [0, 0, 0, 0, 0, 1],
    'VBZ': [0.5, 0, 0.2, 0.3, 0, 0],
    'VBP': [0.4, 0, 0.4, 0.2, 0, 0]
}
A_df = pd.DataFrame(A_data, index=['DT', 'JJ', 'NN', 'NNS', 'VBZ', 'VBP'])

# Emission probabilities (B)
B_data = {
    'the': [1, 0, 0, 0, 0, 0],
    'big': [0, 0.8, 0, 0, 0, 0],
    'kid': [0, 0.2, 0.3, 0, 0, 0],
    'fish': [0, 0, 0.4, 0.3, 0, 0.7],
    'time': [0, 0, 0.3, 0, 0, 0.3],
    'fishes': [0, 0, 0, 0.4, 0.6, 0],
    'times': [0, 0, 0, 0.3, 0.4, 0]
}
B_df = pd.DataFrame(B_data, index=['DT', 'JJ', 'NN', 'NNS', 'VBZ', 'VBP'])

# Initial state probabilities (π)
pi_data = {
    'probability': [0.4, 0.2, 0, 0.3, 0, 0.1]
}
pi_df = pd.DataFrame(pi_data, index=['DT', 'JJ', 'NN', 'NNS', 'VBZ', 'VBP'])

# Display the DataFrames
print("Transition probabilities (A):")
print(A_df)
print("\nEmission probabilities (B):")
print(B_df)
print("\nInitial state probabilities (π):")
print(pi_df)

Transition probabilities (A):
      DT   JJ   NN  NNS  VBZ  VBP
DT   0.0  0.0  0.0    0  0.5  0.4
JJ   0.2  0.0  0.0    0  0.0  0.0
NN   0.5  0.8  0.0    0  0.2  0.4
NNS  0.3  0.2  0.1    0  0.3  0.2
VBZ  0.0  0.0  0.9    0  0.0  0.0
VBP  0.0  0.0  0.0    1  0.0  0.0

Emission probabilities (B):
     the  big  kid  fish  time  fishes  times
DT     1  0.0  0.0   0.0   0.0     0.0    0.0
JJ     0  0.8  0.2   0.0   0.0     0.0    0.0
NN     0  0.0  0.3   0.4   0.3     0.0    0.0
NNS    0  0.0  0.0   0.3   0.0     0.4    0.3
VBZ    0  0.0  0.0   0.0   0.0     0.6    0.4
VBP    0  0.0  0.0   0.7   0.3     0.0    0.0

Initial state probabilities (π):
     probability
DT           0.4
JJ           0.2
NN           0.0
NNS          0.3
VBZ          0.0
VBP          0.1


In [2]:
def viterbi(observations, A, B, pi):
    T = len(observations)
    N = len(A)
    states = A.index.tolist()

    # Initialize viterbi and backpointer matrices
    viterbi_matrix = np.zeros((N, T))
    backpointer = np.zeros((N, T), dtype=int)

    # Initialize first column of viterbi matrix
    for s in range(N):
        viterbi_matrix[s, 0] = pi.iloc[s, 0] * B.loc[states[s], observations[0]]

    # Run Viterbi for t > 0
    for t in range(1, T):
        for s in range(N):
            prob = [viterbi_matrix[s0, t-1] * A.iloc[s, s0] * B.loc[states[s], observations[t]] for s0 in range(N)]
            viterbi_matrix[s, t] = max(prob)
            backpointer[s, t] = np.argmax(prob)

    # Backtrack
    best_path_pointer = np.argmax(viterbi_matrix[:, -1])
    best_path = [states[best_path_pointer]]
    for t in range(T-1, 0, -1):
        best_path_pointer = backpointer[best_path_pointer, t]
        best_path.insert(0, states[best_path_pointer])

    return viterbi_matrix, best_path

# Example usage
observations = ['the', 'kid', 'fishes', 'fish']
viterbi_matrix, best_path = viterbi(observations, A_df, B_df, pi_df)
print(f"Observations: {observations}")
print(f"Most likely POS tags: {best_path}")


Observations: ['the', 'kid', 'fishes', 'fish']
Most likely POS tags: ['DT', 'NN', 'VBZ', 'NNS']
