# Bayesian Nested Mixture Model

This notebook illustrate how to build and train a Bayesian Nested Mixture Model with the [beer framework](https://github.com/beer-asr/beer).

In [17]:
# Add "beer" to the PYTHONPATH
import sys
sys.path.insert(0, '../')

import copy

import beer
import numpy as np
import torch

# For plotting.
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import LinearAxis, Range1d
output_notebook()

# Convenience functions for plotting.
import plotting

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Data

As an illustration, we generate a synthetic data set composed of two Normal distributed cluster. One has a diagonal covariance matrix whereas the other has a dense covariance matrix.

In [18]:
# First cluster.
mean = np.array([-5, 5]) 
cov = .5 *np.array([[.75, .5], [.5, 2.]])
data1 = np.random.multivariate_normal(mean, cov, size=200)

# Second cluster.
mean = np.array([5, 5]) 
cov = 2 * np.array([[2, -.5], [-.5, .75]])
data2 = np.random.multivariate_normal(mean, cov, size=200)

# Merge everything to get the finale data set.
data = np.vstack([data1, data2])
np.random.shuffle(data)

# We use the global mean/cov. matrix of the data to initialize the mixture.
data_mean = torch.from_numpy(data.mean(axis=0)).float()
data_var = torch.from_numpy(np.var(data, axis=0)).float()

In [19]:
# Mean, variance of the data to scale the figure.
mean = data.mean(axis=0)
var = data.var(axis=0)
std_dev = np.sqrt(max(var))
x_range = (mean[0] - 2 * std_dev, mean[0] + 2 * std_dev)
y_range = (mean[1] - 2 * std_dev, mean[1] + 2 * std_dev)
global_range = (min(x_range[0], y_range[0]), max(x_range[1], y_range[1]))

fig = figure(title='Data', width=400, height=400,
             x_range=global_range, y_range=global_range)
fig.circle(data[:, 0], data[:, 1])

show(fig)

## Model Creation

We create two types of mixture model: one whose (Normal) components have full covariance matrix and the other whose (Normal) components have diagonal covariance matrix.

In [20]:
nmixtures = 4
ncomp_per_mixture = 3
total_components = nmixtures * ncomp_per_mixture

# We use the global mean/cov. matrix of the data to initialize the mixture.
data_mean = torch.from_numpy(data.mean(axis=0)).float()
data_var = torch.from_numpy(np.var(data, axis=0)).float()

# Isotropic covariance.
modelset = beer.NormalSet.create(
    data_mean, data_var, 
    size=total_components,
    prior_strength=1., 
    noise_std=1., 
    cov_type='isotropic'
)
mixtureset = beer.MixtureSet.create(nmixtures, modelset)
m_gmm_iso = beer.Mixture.create(mixtureset)

# Diagonal covariance.
modelset = beer.NormalSet.create(
    data_mean, data_var, 
    size=total_components,
    prior_strength=1., 
    noise_std=1., 
    cov_type='diagonal'
)
mixtureset = beer.MixtureSet.create(nmixtures, modelset)
m_gmm_diag = beer.Mixture.create(mixtureset)

# Full covariance.
modelset = beer.NormalSet.create(
    data_mean, data_var,
    size=total_components,
    prior_strength=1.,
    noise_std=1., 
    cov_type='full'
)
mixtureset = beer.MixtureSet.create(nmixtures, modelset)
m_gmm_full = beer.Mixture.create(mixtureset)


models = {
    'm_gmm_iso': m_gmm_iso,
    'm_gmm_diag': m_gmm_diag,
    'm_gmm_full': m_gmm_full
}

In [21]:
print(m_gmm_iso)

Mixture(
  (modelset): MixtureSet(
    (categoricalset): CategoricalSet(
      (weights): ConjugateBayesianParameter(prior=Dirichlet, posterior=Dirichlet)
    )
    (modelset): NormalSet(
      (means_precisions): ConjugateBayesianParameter(prior=IsotropicNormalGamma, posterior=IsotropicNormalGamma)
    )
  )
  (categorical): Categorical(
    (weights): ConjugateBayesianParameter(prior=Dirichlet, posterior=Dirichlet)
  )
)


## Variational Bayes Training 

In [22]:
epochs = 200
lrate = 1.
X = torch.from_numpy(data).float()

optims = {
    model_name: beer.VBConjugateOptimizer(
        model.mean_field_factorization(), 
        lrate
    )
    for model_name, model in models.items()
}

elbos = {
    model_name: [] 
    for model_name in models
}  
    
for epoch in range(epochs):
    for name, model in models.items():
        optim = optims[name]
        optim.init_step()
        elbo = beer.evidence_lower_bound(model, X, datasize=len(X))
        elbo.backward()
        elbos[name].append(float(elbo) / len(X))
        optim.step()

In [23]:
colors = {
    'm_gmm_iso': 'green',
    'm_gmm_diag': 'blue',
    'm_gmm_full': 'red',
    'm_gmm_iso_shared': 'grey',
    'm_gmm_diag_shared': 'brown',
    'm_gmm_full_shared': 'black'
}
# Plot the ELBO.
fig = figure(title='ELBO', width=400, height=400, x_axis_label='step',
              y_axis_label='ln p(X)')
for model_name, elbo in elbos.items():
    fig.line(range(len(elbo)), elbo, legend=model_name, color=colors[model_name])
fig.legend.location = 'bottom_right'

show(fig)

In [24]:
figs = []
for i, model_name in enumerate(models):
    fig = figure(title=model_name, x_range=global_range, y_range=global_range,
                  width=250, height=250)
    model = models[model_name]
    weights = model.categorical.mean
    for j, gmm in enumerate(model.modelset):
        fig.circle(data[:, 0], data[:, 1], alpha=.1)
        plotting.plot_gmm(fig, gmm, alpha=weights[j].numpy())
    if i % 3 == 0:
        figs.append([])
    figs[-1].append(fig)
grid = gridplot(figs)
show(grid)

## Hierarchical Dirichlet Process Mixture Model



In [25]:
import pickle 
with open('/home/lucas/Desktop/test_reorder.mdl', 'rb') as f:
    ploop = pickle.load(f)
    
sb_categoricalset = beer.SBCategoricalSet.create(len(ploop.start_pdf), ploop.categorical, prior_strength=1)
sb_categoricalset.mean_field_factorization()
sb_categoricalset.stickbreaking

AttributeError: 'SBCategorical' object has no attribute '_update_ordering'

In [None]:
ploop.categorical.ordering

In [None]:
data = torch.eye(len(ploop.start_pdf))
stats = ploop.categorical.sufficient_statistics(data)
log_weights = ploop.categorical.expected_log_likelihood(stats)
log_weights

In [None]:
data = torch.eye(len(ploop.start_pdf))
stats = sb_categoricalset.sufficient_statistics(data)
log_weights = sb_categoricalset.expected_log_likelihood(stats)
log_weights

In [None]:
from scipy.special import gamma

def beta(x, a, b):
    norm = gamma(a + b) / (gamma(a) * gamma(b))
    return (x ** (a - 1) * (1 - x)**(b-1)) * norm

x = np.linspace(1e-3, 0.999, 1000)
mean = ploop.categorical.mean.numpy()
cmean = mean.cumsum()
concentration = 1000

fig = figure()

for i in range(90, 100):
    p_x = beta(x, concentration * mean[i], concentration * (1 - cmean[i]))
    if i == 0:
        fig.line(x, p_x, color='red')
    else:
        fig.line(x, p_x)
show(fig)


In [None]:
def hdp_sb(mean, concentration):
    cmean = np.cumsum(mean)
    v = np.array([np.random.beta(concentration * mean[i], concentration * (1 - cmean[i]))
                  for i in range(101)])
    residual = np.cumprod(1 - v)
    pi = v
    pi[1:] *= residual[:-1]
    return pi

samples = np.c_[[hdp_sb(mean, concentration=100) for i in range(50)]]

fig = figure()


for sample in samples:
    fig.line(range(101), sample, alpha=.3)
fig.line(range(101), mean, color='red')
fig.line(range(101), samples.mean(axis=0), color='green')

show(fig)

In [None]:
sb_categoricalset = beer.SBCategoricalSet.create(len(ploop.start_pdf), ploop.categorical, prior_strength=50)
vbinit = sb_categoricalset.mean.numpy()

fig = figure()
fig.line(range(101), mean, color='red')
fig.line(range(101), samples.mean(axis=0), color='green')
fig.line(range(101), vbinit.mean(axis=0), color='blue')

show(fig)

In [None]:
import pickle 
with open('/home/lucas/Desktop/test_reorder.mdl', 'rb') as f:
    ploop = pickle.load(f)
sb_categoricalset = beer.SBCategoricalSet.create(len(ploop.start_pdf), ploop.categorical, prior_strength=1)
bploop = beer.BigramPhoneLoop.create(ploop.graph, ploop.start_pdf, ploop.end_pdf,
                                     ploop.modelset, sb_categoricalset).double()

X = torch.from_numpy(np.load('/home/lucas/Desktop/mzmb0_sx176.npy'))
model = bploop
epochs = 10
optim =  beer.VBConjugateOptimizer(model.mean_field_factorization(), lrate=1)
elbos = []
    
for epoch in range(epochs):
    optim.init_step()
    elbo = beer.evidence_lower_bound(model, X)
    #elbo += beer.evidence_lower_bound(model, X)
    elbo.backward()
    elbos.append(float(elbo) / len(X))
    optim.step()
    print(elbos[-1])

In [None]:
mean = bploop.categoricalset.mean.numpy()
print(mean.sum())

fig = figure(x_range=(0, 101), y_range=(0, 101))
fig.image(image=[mean], x=0, y=0, dh=101, dw=101)
show(fig)

In [None]:
with open('/home/lucas/Desktop/test_bigram_reorder.mdl', 'rb') as f:
    bploop = pickle.load(f)
    
mean = bploop.categoricalset.mean[:, bploop.categoricalset.ordering].numpy()
print(mean.sum())

fig = figure()
fig.line(range(101), mean[9])
show(fig)

fig = figure(x_range=(0, 101), y_range=(0, 101))
fig.image(image=[mean], x=0, y=0, dh=101, dw=101)
show(fig)

In [None]:
with open('/home/lucas/Desktop/test_reorder.mdl', 'rb') as f:
    ploop = pickle.load(f)
    
print(ploop.categorical.ordering)
mean = ploop.categorical.mean[ploop.categorical.ordering].numpy()

fig = figure()
fig.line(range(101), mean, color='green')

with open('/home/lucas/Desktop/test_reorder.mdl', 'rb') as f:
    ploop = pickle.load(f)
    
print(ploop.categorical.ordering)
mean = ploop.categorical.mean[ploop.categorical.ordering].numpy()
fig.line(range(101), mean)

show(fig)

mean = ploop.categorical.mean[ploop.categorical.ordering].repeat(101, 1).numpy()
print(mean.sum())

fig = figure(x_range=(0, 101), y_range=(0, 101))
fig.image(image=[mean], x=0, y=0, dh=101, dw=101)
show(fig)

In [None]:
mean = ploop.categorical.mean.repeat(101, 1).numpy()
print(mean.sum())

fig = figure(x_range=(0, 101), y_range=(0, 101))
fig.image(image=[np.log(mean)], x=0, y=0, dh=101, dw=101, palette='Viridis256')
show(fig)

In [None]:
x = torch.tensor([1, 2, 3])
x.repeat(4, 2)