# Bayesian HMM Model

This notebook illustrate how to build and train a Bayesian Hidden Markov Model with the [beer framework](https://github.com/beer-asr/beer).

In [1]:
# Add "beer" to the PYTHONPATH
import sys
sys.path.insert(0, '../')

import copy

import beer
import numpy as np
import torch

# For plotting.
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import LinearAxis, Range1d
output_notebook()

# Convenience functions for plotting.
import plotting

%load_ext autoreload
%autoreload 2

In [2]:
# Sequence: AB

seqs = ['A', 'B', 'A']
nsamples = 30
ndim = 2

units = ['A', 'B']
len_seqs = len(seqs)
num_unit_states = 3
tot_states = len(seqs) * num_unit_states

trans_mat = beer.HMM.create_ali_trans_mat(tot_states)

means = [np.array([-1.5, 3]), np.array([-1.5, 4]), np.array([-1.5, 5]),
         np.array([1, -3]), np.array([1, -2]), np.array([1, -1])]
covs = [np.array([[.75, -.5], [-.5, 2.]]), np.array([[.75, -.5], [-.5, 2.]]), np.array([[.75, -.5], [-.5, 2.]]),
        np.array([[2, 1], [1, .75]]), np.array([[2, 1], [1, .75]]), np.array([[2, 1], [1, .75]])]

states_id = {'A':[0, 1, 2], 'B':[3, 4, 5]}
dict_seq_state = {}

seqs_id = []
for i, j in enumerate(seqs):
    for u in range(num_unit_states):
        dict_seq_state[num_unit_states * i + u] = states_id[j][u]
        seqs_id.append(states_id[j][u])

normal_sets = list(zip(means,covs))

states = np.zeros(nsamples, dtype=np.int16)
data = np.zeros((nsamples, ndim))
states[0] = states_id['A'][0]
data[0] = np.random.multivariate_normal(means[0], covs[0], size=1)

colors = ['blue', 'blue', 'blue', 'red', 'red', 'red']
fig1 = figure(title='Samples', width=400, height=400)
fig1.circle(data[0, 0], data[0, 1], color=colors[states[0]])


for n in range(1, nsamples):
    states[n] = np.random.choice(np.arange(tot_states), p=trans_mat[states[n-1]].numpy())
    data[n] = np.random.multivariate_normal(means[dict_seq_state[states[n]]], covs[dict_seq_state[states[n]]], size=1)
    fig1.circle(data[n, 0], data[n, 1], color=colors[dict_seq_state[states[n]]], line_width=1)
    fig1.line(data[n-1:n+1, 0], data[n-1:n+1, 1], color='black', line_width=.5, alpha=.5)

states_id = [dict_seq_state[i] for i in states]
    
fig2 = figure(title='Emissions',  width=400, height=400)
colors = ['darkblue', 'blue', 'skyblue', 'darkred','red', 'pink']

for i, n in enumerate(normal_sets):
    plotting.plot_normal(fig2, n[0], n[1], alpha=.3, color=colors[i])
grid = gridplot([[fig1, fig2]])
show(grid)
print(states_id)

[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 3, 3, 4, 4, 5, 5, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2]


## Model Creation

We create several types of HMMs, each of them has the same transition matrix and initial / final state probability, and a specific type of emission density: 
  * one Normal density per state with full covariance matrix
  * one Normal density per state with diagonal covariance matrix
  * one Normal density per state with full covariance matrix shared across states
  * one Normal density per state with diagonal covariance matrix shared across states.

In [19]:
# We use the global mean/cov. matrix of the data to initialize the mixture.

num_units = len(units)
len_seqs = len(seqs)


p_mean = torch.from_numpy(data.mean(axis=0)).float()
p_cov = torch.from_numpy(np.cov(data.T)).float()

nstates = num_units * num_unit_states
init_states = torch.from_numpy(np.arange(nstates))
final_states = torch.from_numpy(np.arange(nstates))

nstates_ali = len(seqs_id)
init_states_ali = torch.tensor([0])
final_states_ali = torch.tensor([len(seqs_id) - 1])



# HMM1 (diag cov), phone loop
unit_priors = torch.distributions.Dirichlet(torch.ones(num_units)).sample()
gamma = .5
trans_mat_loop = beer.HMM.create_trans_mat(unit_priors, num_unit_states, gamma)
normalset = beer.NormalDiagonalCovarianceSet.create(p_mean, torch.diag(p_cov), nstates, noise_std=0.5)
hmm_diag_loop = beer.HMM.create(init_states, final_states, trans_mat_loop, normalset)

# HMM2 (diag cov), ali model
trans_mat_ali = beer.HMM.create_ali_trans_mat(len_seqs * num_unit_states) 
normalset_ali = beer.NormalDiagonalCovarianceSet.create(p_mean, torch.diag(p_cov), nstates, noise_std = 0.5)
ali_sets = beer.AlignModelSet(normalset_ali, seqs_id)
hmm_diag_align = beer.HMM.create(init_states_ali, final_states_ali, trans_mat_ali, ali_sets)

models = [
    hmm_diag_loop, 
    hmm_diag_align
]

In [29]:
seqs_id

[0, 1, 2, 3, 4, 5, 0, 1, 2]

In [28]:
print(hmm_diag_align.modelset.expected_natural_params_as_matrix().t().shape)
hmm_diag_align.sufficient_statistics(X)[1].shape

torch.Size([8, 9])


torch.Size([30, 8])

## Variational Bayes Training 

In [4]:
epochs = 100
lrate = 1.
labels = states
X = torch.from_numpy(data).float()
Z = [None, torch.from_numpy(labels).long()]
elbo_fn = beer.EvidenceLowerBound(len(X))
params = []
for model in models:
    params += model.parameters
optimizer = beer.BayesianModelOptimizer(params, lrate)
    
elbos = [[], []]
for epoch in range(epochs):
    optimizer.zero_grad()
    for i, model in enumerate(models):
        elbo = elbo_fn(model, X)
        elbo.natural_backward()
        elbos[i].append(float(elbo) / len(X))
    optimizer.step()

# Plot the ELBO.
fig = figure(title='ELBO', width=400, height=400, x_axis_label='step',
              y_axis_label='ln p(X)')
fig.line(range(epochs), elbos[0], legend='HMM-loop (diag)', color='blue')
fig.line(range(epochs), elbos[1], legend='HMM-align (diag)', color='red')

fig.legend.location = 'bottom_right'

show(fig)

In [12]:
mean = data.mean(axis=0)
var = data.var(axis=0)
std_dev = np.sqrt(max(var))
x_range = (mean[0] - 2 * std_dev, mean[0] + 2 * std_dev)
y_range = (mean[1] - 2 * std_dev, mean[1] + 2 * std_dev)
global_range = (min(x_range[0], y_range[0]), max(x_range[1], y_range[1]))

fig1 = figure(title='HMM (diag) loop', x_range=global_range, y_range=global_range,
              width=400, height=400)
fig1.circle(data[:, 0], data[:, 1], alpha=.5, color='blue')
plotting.plot_hmm(fig1, hmm_diag_loop, alpha=.1, color='blue')

fig2 = figure(title='HMM (diag) align', x_range=global_range, y_range=global_range,
              width=400, height=400)
fig2.circle(data[:, 0], data[:, 1], alpha=.5, color='red')
plotting.plot_hmm(fig2, hmm_diag_align, alpha=.1, color='red')
grid = gridplot([[fig1, fig2]])
show(grid)

### Plotting

In [5]:
stats1 = hmm_diag_loop.sufficient_statistics(torch.from_numpy(data).float())
hmm_diag_loop(stats1)
lhs1 = hmm_diag_loop._resps.numpy().T

stats2 = hmm_diag_align.sufficient_statistics(torch.from_numpy(data).float())
hmm_diag_align(stats1)
lhs2 = hmm_diag_align._resps.numpy().T

fig1, axarr = plt.subplots(2, 1)
axarr[0].imshow(lhs1, origin='lower')
axarr[0].set_title('HMM loop (diag) lhs')
axarr[1].imshow(lhs2, origin='lower')
axarr[1].set_title('HMM align (diag) lhs')
plt.tight_layout()
plt.show()

ValueError: too many values to unpack (expected 2)