# Bayesian Mixture Model

This notebook illustrate how to build and train a Bayesian Mixture Model with the [beer framework](https://github.com/beer-asr/beer).

In [None]:
# Add "beer" to the PYTHONPATH
import sys
sys.path.insert(0, '../')

import copy

import beer
import numpy as np
import torch

# For plotting.
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import LinearAxis, Range1d
output_notebook()

# Convenience functions for plotting.
import plotting

%load_ext autoreload
%autoreload 2

## Data

As an illustration, we generate a synthetic data set composed of two Normal distributed cluster. One has a diagonal covariance matrix whereas the other has a dense covariance matrix.

In [None]:
# First cluster.
mean = np.array([-5, 5]) 
cov = .5 *np.array([[.75, .5], [.5, 2.]])
data1 = np.random.multivariate_normal(mean, cov, size=200)

# Second cluster.
mean = np.array([5, 5]) 
cov = 2 * np.array([[2, -.5], [-.5, .75]])
data2 = np.random.multivariate_normal(mean, cov, size=200)

# Merge everything to get the finale data set.
data = np.vstack([data1, data2])
np.random.shuffle(data)

In [None]:
# Mean, variance of the data to scale the figure.
mean = data.mean(axis=0)
var = data.var(axis=0)
std_dev = np.sqrt(max(var))
x_range = (mean[0] - 2 * std_dev, mean[0] + 2 * std_dev)
y_range = (mean[1] - 2 * std_dev, mean[1] + 2 * std_dev)
global_range = (min(x_range[0], y_range[0]), max(x_range[1], y_range[1]))

fig = figure(title='Data', width=400, height=400,
             x_range=global_range, y_range=global_range)
fig.circle(data[:, 0], data[:, 1])

show(fig)

## Model Creation

We create two types of mixture model: one whose (Normal) components have full covariance matrix and the other whose (Normal) components have diagonal covariance matrix.

In [None]:
confs = {
    'gmm_iso': {
        'type': 'Mixture',
        'prior_strength': 1.,
        'components': {
            'type': 'NormalSet',
            'size': 2,
            'covariance': 'isotropic',
            'shared_covariance': False,
            'prior_strength': 1.,
            'noise_std': 1.
        }
    },   
    'gmm_diag': {
        'type': 'Mixture',
        'prior_strength': 1.,
        'components': {
            'type': 'NormalSet',
            'size': 2,
            'covariance': 'diagonal',
            'shared_covariance': False,
            'prior_strength': 1.,
            'noise_std': 1.
        }
    }, 
    'gmm_full': {
        'type': 'Mixture',
        'prior_strength': 1.,
        'components': {
            'type': 'NormalSet',
            'size': 2,
            'covariance': 'full',
            'shared_covariance': False,
            'prior_strength': 1.,
            'noise_std': 1.
        }
    }, 
    'gmm_iso_shared': {
        'type': 'Mixture',
        'prior_strength': 1.,
        'components': {
            'type': 'NormalSet',
            'size': 2,
            'covariance': 'isotropic',
            'shared_covariance': True,
            'prior_strength': 1.,
            'noise_std': 1.
        }
    },   
    'gmm_diag_shared': {
        'type': 'Mixture',
        'prior_strength': 1.,
        'components': {
            'type': 'NormalSet',
            'size': 2,
            'covariance': 'diagonal',
            'shared_covariance': True,
            'prior_strength': 1.,
            'noise_std': 1.
        }
    }, 
    'gmm_full_shared': {
        'type': 'Mixture',
        'prior_strength': 1.,
        'components': {
            'type': 'NormalSet',
            'size': 2,
            'covariance': 'full',
            'shared_covariance': True,
            'prior_strength': 1.,
            'noise_std': 1.
        }
    }, 
}

In [None]:
# We use the global mean/cov. matrix of the data to initialize the mixture.
data_mean = torch.from_numpy(data.mean(axis=0)).float()
data_var = torch.from_numpy(np.var(data, axis=0)).float()

models = {}
for model_name, conf in confs.items():
    models[model_name] = beer.create_model(conf, data_mean, data_var)

## Variational Bayes Training 

In [None]:
epochs = 100
lrate = 1.
X = torch.from_numpy(data).float()
params = []
for model_name, model in models.items():
    params += model.parameters
optimizer = beer.BayesianModelOptimizer(params, lrate)
    
elbos = {model_name: [] for model_name in models}
for epoch in range(epochs):
    optimizer.zero_grad()
    for model_name, model in models.items():
        elbo = beer.evidence_lower_bound(model, X, datasize=len(X))
        elbo.natural_backward()
        elbos[model_name].append(float(elbo) / len(X))
    optimizer.step()

In [None]:
colors = {
    'gmm_iso': 'green',
    'gmm_diag': 'blue',
    'gmm_full': 'red',
    'gmm_iso_shared': 'grey',
    'gmm_diag_shared': 'brown',
    'gmm_full_shared': 'black'
    
}
# Plot the ELBO.
fig = figure(title='ELBO', width=400, height=400, x_axis_label='step',
              y_axis_label='ln p(X)')
for model_name, elbo in elbos.items():
    fig.line(range(len(elbo)), elbo, legend=model_name, color=colors[model_name])
fig.legend.location = 'bottom_right'

show(fig)

In [None]:
models['gmm_iso_shared'].modelset[0].cov

In [None]:
figs = []
for i, model_name in enumerate(models):
    model = models[model_name]
    fig = figure(title=model_name, x_range=global_range, y_range=global_range,
              width=300, height=300)
    fig.circle(data[:, 0], data[:, 1], alpha=.1)
    plotting.plot_gmm(fig, model, alpha=.5, color=colors[model_name])
    if i % 3 == 0:
        figs.append([])
    figs[-1].append(fig)
grid = gridplot(figs)
show(grid)