In [1]:
import numpy as np

# Forward algorithm to calculate alpha values
def forward_algorithm(data, w, A, B):
    T = len(data)
    alpha = np.zeros((T, mz))
    
    # Initialization step
    alpha[0, :] = w * B[:, data[0]]

    # Recursion step
    for t in range(1, T):
        for i in range(mz):
            alpha[t, i] = np.sum(alpha[t - 1] * A[:, i]) * B[i, data[t]]
            
    return alpha

# Backward algorithm to calculate beta values
def backward_algorithm(data, A, B):
    T = len(data)
    mz = A.shape[0]
    beta = np.zeros((T, mz))

    # Initialization step
    beta[T - 1, :] = 1

    # Recursion step
    for t in range(T - 2, -1, -1):
        for i in range(mz):
            beta[t, i] = np.sum(A[i, :] * B[:, data[t + 1]] * beta[t + 1, :])

    return beta

# Baum-Welch one step (E-step and M-step)
def BW_onestep(data, w, A, B, mz, mx):
    T = len(data)
    
    # E-step
    alpha = forward_algorithm(data, w, A, B)
    beta = backward_algorithm(data, A, B)
    gamma = np.zeros((T, mz))
    xi = np.zeros((T - 1, mz, mz))

    for t in range(T):
        gamma[t, :] = alpha[t, :] * beta[t, :]
        gamma[t, :] /= np.sum(gamma[t, :])

    for t in range(T - 1):
        denom = np.sum(alpha[t, :] * np.sum(A * B[:, data[t + 1]].reshape(1, -1) * beta[t + 1, :], axis=1))
        for i in range(mz):
            xi[t, i, :] = alpha[t, i] * A[i, :] * B[:, data[t + 1]] * beta[t + 1, :]
            xi[t, i, :] /= denom

    # M-step
    A_new = np.sum(xi, axis=0) / np.sum(gamma[:-1, :], axis=0).reshape(-1, 1)
    B_new = np.zeros((mz, mx))

    for i in range(mz):
        for l in range(mx):
            B_new[i, l] = np.sum(gamma[data == l, i])
        B_new[i, :] /= np.sum(gamma[:, i])

    return A_new, B_new

# Viterbi algorithm to find the most likely sequence of latent states
def myViterbi(data, w, A, B, mz, mx):
    T = len(data)
    mz = len(w)
    delta = np.zeros((T, mz))
    psi = np.zeros((T, mz), dtype=int)

    # Initialization
    delta[0, :] = w * B[:, data[0]]
    psi[0, :] = 0

    # Recursion
    for t in range(1, T):
        for i in range(mz):
            delta[t, i] = np.max(delta[t - 1] * A[:, i]) * B[i, data[t]]
            psi[t, i] = np.argmax(delta[t - 1] * A[:, i])

    # Termination
    Z = np.zeros(T, dtype=int)
    Z[T - 1] = np.argmax(delta[T - 1, :]) + 1

    # Path backtracking
    for t in range(T - 2, -1, -1):
        Z[t] = psi[t + 1, Z[t + 1] - 1] + 1

    return Z

# Example usage with provided data
data = np.loadtxt('Coding4_part2_data.txt', dtype=int) - 1
mz = 2  
mx = 3  
w = np.array([0.5, 0.5])
A = np.array([[0.5, 0.5], [0.5, 0.5]])
B = np.array([[1/9, 3/9, 5/9], [1/6, 2/6, 3/6]])

# Run Baum-Welch for 100 iterations
for _ in range(100):
    A, B = BW_onestep(data, w, A, B, mz, mx)

# Run Viterbi to get the most likely sequence of hidden states
Z = myViterbi(data, w, A, B, mz, mx)

print("\nTesting 1. \n")
print(f"Transition matrix A: \n{A}") 
print(f"Emission matrix B: \n{B}")
print(f"Most likely latent sequence Z: \n{Z}")


# Initialize matrix B with each entry as 1/3, run Baum-Welch for 20 and 100 iterations, and analyze results
B_new = np.array([[1/3, 1/3, 1/3], [1/3, 1/3, 1/3]])

# Run Baum-Welch for 20 iterations
for _ in range(20):
    A, B_new = BW_onestep(data, w, A, B_new, mz, mx)

print("\nTesting 2. \n")
print("Transition matrix A after 20 iterations with uniform B initialization:")
print(A)
print("Emission matrix B after 20 iterations with uniform B initialization:")
print(B)

# Run Baum-Welch for 100 iterations
for _ in range(80):  # Continue for an additional 80 iterations to reach 100
    A, B_new = BW_onestep(data, w, A, B_new, mz, mx)

print("Transition matrix A after 100 iterations with uniform B initialization:")
print(A)
print("Emission matrix B after 100 iterations with uniform B initialization:")
print(B)


Testing 1. 

Transition matrix A: 
[[0.49793938 0.50206062]
 [0.44883431 0.55116569]]
Emission matrix B: 
[[0.22159897 0.20266127 0.57573976]
 [0.34175148 0.17866665 0.47958186]]
Most likely latent sequence Z: 
[1 1 1 1 1 1 1 2 1 1 1 1 1 2 2 1 1 1 1 1 1 1 2 2 2 2 2 1 1 1 1 1 1 1 2 1 1
 1 1 1 1 1 1 2 2 1 1 1 1 1 1 2 2 2 1 1 1 1 2 2 2 2 1 1 1 1 1 1 1 1 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1
 1 1 1 2 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2
 2 2 2 1 1 1 2 2 2 2 2 2 1 1 1 1 1 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 1 1 1 1 1
 1 1 1 2 2 2 2 2 1 1 1 1 1 1 1]

Testing 2. 

Transition matrix A after 20 iterations with uniform B initialization:
[[0.49735183 0.50264817]
 [0.44820615 0.55179385]]
Emission matrix B after 20 iterations with uniform B initialization:
[[0.22159897 0.20266127 0.57573976]
 [0.34175148 0.17866665 0.47958186]]
Transition matrix A after 100 iterations with uniform B initialization:
[[0.48927883 0.51072117]
 [0.41