# Bayesian Nested Mixture Model

This notebook illustrate how to build and train a Bayesian Nested Mixture Model with the [beer framework](https://github.com/beer-asr/beer). A Nested Mixture Model is simply a "mixture of mixture", the rationale behind this model is that the clusters of a mixture model may have arbitrary complex distribution approximated by yet another mixture model.

In [None]:
# Add "beer" to the PYTHONPATH
import sys
sys.path.append('../')

import copy

import beer
import numpy as np

# For plotting.
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import LinearAxis, Range1d
output_notebook()

# Convenience functions for plotting.
import plotting

%load_ext autoreload
%autoreload 2

## Data

We generate a synthetic data:


In [None]:
# First cluster.
mean = np.array([-1.5, 4]) 
cov = np.array([[.75, 0], [0, 2.]])
data1 = np.random.multivariate_normal(mean, cov, size=100)

# Second cluster.
mean = np.array([5, 5]) 
cov = np.array([[2, 1], [1, .75]])
data2 = np.random.multivariate_normal(mean, cov, size=100)

# Third cluster.
mean = np.array([-1.5, -4]) 
cov = np.array([[.75, 0], [0, 2.]])
data3 = np.random.multivariate_normal(mean, cov, size=100)

# Fourth cluster.
mean = np.array([5, -5]) 
cov = np.array([[2, 1], [1, .75]])
data4 = np.random.multivariate_normal(mean, cov, size=100)

# Merge everything to get the finale data set.
data = np.vstack([data1, data2, data4, data3])
np.random.shuffle(data)

In [None]:
# Mean, variance of the data to scale the figure.
mean = data.mean(axis=0)
var = data.var(axis=0)
std_dev = np.sqrt(max(var))
x_range = (mean[0] - 2 * std_dev, mean[0] + 2 * std_dev)
y_range = (mean[1] - 2 * std_dev, mean[1] + 2 * std_dev)

fig = figure(title='Data', width=400, height=400,
             x_range=x_range, y_range=y_range)
fig.circle(data[:, 0], data[:, 1])

show(fig)

## Model Creation

We create two types of mixture model: one whose (Normal) components have full covariance matrix and the other whose (Normal) components have diagonal covariance matrix.

In [None]:
# We use the global mean/cov. matrix of the data to initialize the mixture.
p_mean = data.mean(axis=0)
p_cov = np.cov(data.T)

# Create the models.
args = {'dim':2, 'mean':p_mean, 'cov': p_cov, 'prior_count':1, 'random_init':True}
gmm_diag = beer.NestedMixture.create(2, 5, beer.NormalDiagonalCovariance.create, args, prior_count=1e-3)
gmm_full = beer.NestedMixture.create(2, 5, beer.NormalFullCovariance.create, args, prior_count=1e-3)

## Variational Bayes Training 

In [None]:
# Callback to monitor the training progress.
elbos, llhs, klds = [], [], []
def callback(elbo, llh, kld):
    elbos.append(elbo)
    llhs.append(llh)
    klds.append(kld)

# Train the GMM with diagonal cov. matrix components.
beer.training.train_conj_exp(gmm_diag, data, max_epochs=200, callback=callback)
elbos_diag = copy.deepcopy(elbos[1:])
llhs_diag =  copy.deepcopy(llhs[1:])
klds_diag = copy.deepcopy(klds[1:])

# Re-initialize the training progress.
elbos, llhs, klds = [], [], []

# Train the GMM with full cov. matrix components.
beer.training.train_conj_exp(gmm_full, data, max_epochs=200, callback=callback)
elbos_full = copy.deepcopy(elbos[1:])
llhs_full =  copy.deepcopy(llhs[1:])
klds_full = copy.deepcopy(klds[1:])

# Plot the ELBO.
fig = figure(title='ELBO', width=400, height=400, x_axis_label='step',
              y_axis_label='ln p(X)')
fig.line(np.arange(len(elbos_diag)), elbos_diag, legend='GMM (diag)', color='blue')
fig.line(np.arange(len(elbos_full)), elbos_full, legend='GMM (full)', color='red')
fig.legend.location = 'bottom_right'

show(fig)