The following is a working Hidden Markov Model for a slightly extended case of the problem for HW 2 (Bob telling Alice about what he does all day):

Using either this implementation or your own Hidden Markov Model, optimize the code! The second cell contains a decent starting test case - on my laptop it takes about 20 seconds to run 1000 iterations of the HMM.

Profile, vectorize, reduce function calls, numba, repeat to your heart's content!

Once you feel like your code is reasonably optimized, run the final cell, which does 50000 iterations.

Note that if you choose to do this in a jupyter notebook, you can still use snakeviz with the
%load_ext snakeviz and %%snakeviz -t commands.

Also note that numba is of course useful but not the only way to achieve significant speedups - my optimized code is roughly 3x faster than just adding @njit to everything.

In [None]:
import numpy as np
from tqdm.notebook import tqdm
def forward_algo(A_matrix, B_matrix, pi, O):
    T = O.shape[0]
    alphas = np.zeros((pi.shape[0], T))
    alphas[:, 0] = pi * B_matrix[:, O[0]]
    for t in np.arange(1, T):
        for j in np.arange(alphas.shape[0]):
            alphas[j, t] = np.sum(A_matrix[:, j] * alphas[:, t - 1]) * B_matrix[j, O[t]]
    return alphas


def backward_algo(A_matrix, B_matrix, pi, O):
    T = O.shape[0]
    betas = np.zeros((pi.shape[0], T))
    betas[:, -1] = 1
    for t in np.arange(1, T):
        for i in np.arange(betas.shape[0]):
            betas[i, -(t + 1)] = np.sum(A_matrix[i, :] * B_matrix[:, O[-t]] * betas[:, -t])
    return betas


def compute_gammas(alphas, betas):
    gammas = alphas * betas
    gammas /= np.sum(gammas, axis=0)
    assert np.all(np.isclose(np.sum(gammas, axis=0), 1))
    return gammas

def compute_xis(alphas, betas, A_matrix, B_matrix, O):
    xis = np.zeros((alphas.shape[0], alphas.shape[0], alphas.shape[1] - 1))
    for t in np.arange(alphas.shape[1] - 1):
        for i in np.arange(alphas.shape[0]):
            for j in np.arange(alphas.shape[0]):
                xis[i, j, t] = alphas[i, t] * A_matrix[i, j] * B_matrix[j, O[t + 1]] * betas[j, t + 1]
    xis /= np.sum(np.sum(xis, axis=0),axis=0)
    return xis


def update_params(gammas, xis, A_matrix, B_matrix, O, pi):
    pi_new = gammas[:, 0]
    A_matrix_new = np.zeros_like(A_matrix)
    B_matrix_new = np.zeros_like(B_matrix)
    for j in np.arange(A_matrix_new.shape[0]):
        A_matrix_new[:, j] = np.sum(xis[:, j, :], axis=1) / np.sum(gammas[:, :-1], axis=1)

    for k in np.arange(B_matrix_new.shape[1]):
        obs_mask = O == k
        B_matrix_new[:, k] = np.sum(gammas[:, obs_mask], axis=1) / np.sum(gammas, axis=1)

    return pi_new, A_matrix_new, B_matrix_new


def run_one_iter(A_matrix, B_matrix, pi, O):
    alphas = forward_algo(A_matrix, B_matrix, pi, O)
    betas = backward_algo(A_matrix, B_matrix, pi, O)
    gammas = compute_gammas(alphas, betas)
    xis = compute_xis(alphas, betas, A_matrix, B_matrix, O)
    assert np.all(np.isclose(np.sum(xis, axis=1), gammas[:, :-1]))
    pi_new, A_matrix_new, B_matrix_new = update_params(gammas, xis, A_matrix, B_matrix, O, pi)
    return pi_new, A_matrix_new, B_matrix_new, np.sum(alphas[:, -1])


def run_hmm(A_init, B_init, pi_init, O, tol, max_iter):
    pi, A, B = pi_init, A_init, B_init
    itercount = 0
    likelihood = -np.inf
    while itercount < max_iter:
        pi, A, B, ll_new = run_one_iter(A, B, pi, O)
        if ll_new - likelihood < tol:
            return pi, A, B, ll_new
        likelihood = ll_new
        itercount += 1
    print("failed to converge!")
    return None, None, None, None

In [None]:
#is day 1 sunny, cloudy, or rainy?
pi_init = np.array([0.1, 0.1, 0.8])

#P([sunny -> sunny, sunny -> cloudy, sunny -> rainy], etc)
A_init = np.array([[0.5, 0.4, 0.1], [0.3, 0.4, 0.3], [0.1, 0.45, 0.45]])

#P([game, clean, walk, pickleball, amusement park]|sunny, etc)
B_init = np.array([[.2, .1, .3, .25, .15], [.4, .15, .3, .1, .05], [0.6, .2, .198, 0.001, 0.001]])

rng = np.random.default_rng(1)
obs_array = rng.choice([0,1,2,3,4], (1000, 250), replace=True)
tol = 1e-8
max_iter = 100

for i in tqdm(np.arange(obs_array.shape[0])):
    pi_final, A_final, B_final, ll_final = run_hmm(A_init, B_init, pi_init, obs_array[i, :], tol, max_iter)

In [None]:
### final test - don't run initially it will be pretty slow
obs_array = rng.choice([0,1,2,3,4], (50000, 250), replace=True)
tol = 1e-8
max_iter = 100

for i in tqdm(np.arange(obs_array.shape[0])):
    pi_final, A_final, B_final, ll_final = run_hmm(A_init, B_init, pi_init, obs_array[i, :], tol, max_iter)