# Bayesian Mixture Model

This notebook illustrate how to build and train a Bayesian Mixture Model with the [beer framework](https://github.com/beer-asr/beer).

In [12]:
# Add "beer" to the PYTHONPATH
import sys
sys.path.insert(0, '../')

import copy

import beer
import numpy as np
import torch

# For plotting.
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import LinearAxis, Range1d
output_notebook()

# Convenience functions for plotting.
import plotting

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Data

As an illustration, we generate a synthetic data set composed of two Normal distributed cluster. One has a diagonal covariance matrix whereas the other has a dense covariance matrix.

In [2]:
N = 100

# First cluster.
mean = np.array([-5, 5]) 
cov = .5 *np.array([[.75, 0.], [0, 5.]])
data1 = np.random.multivariate_normal(mean, cov, size=N)

# Second cluster.
mean = np.array([5, 5]) 
cov = 2 * np.array([[2, -.5], [-.5, .75]])
data2 = np.random.multivariate_normal(mean, cov, size=N)

# Merge everything to get the finale data set.
data = np.vstack([data1, data2])
np.random.shuffle(data)

# Prepare the data for pytorch
X = torch.from_numpy(data).double()


fig = figure(width=400, height=400,
             x_range=(-10, 10), y_range=(-5, 15))
fig.circle(data[:, 0], data[:, 1])

show(fig)

## Model Creation

In [8]:
data_mean = torch.from_numpy(data.mean(axis=0)).double()
data_var = torch.from_numpy(np.var(data, axis=0)).double()

modelset = beer.NormalSet.create(
    data_mean, data_var,      # use to set the mean/variance of the prior
    size=20,                  # total number of components in the mixture
    prior_strength=1.,        # how much the prior affect the training ("pseudo-counts")
    noise_std=1,              # standard deviation of the noise to initialize the mean of the posterior
    cov_type='full',          # type of the covariance matrix  ('full', 'diagonal' or 'isotropic')
    shared_cov=False          # if True, all the components share the same covariance matrix
)
model = beer.Mixture.create(
    modelset, 
    prior_strength=1          # how much the prior over the weights will affect the training ("pseudo-counts")
)

model = model.double()        # set all the parameters in double precision
#model = model.cuda()          # move the model on a GPU. If you do so, you'll have
                               # to move the data as well.
    
print(model)

Mixture(
  (modelset): NormalSetFullCovariance(
    (means_precisions): BayesianParameterSet(
      (0): <ConjugateBayesianParameter(prior=NormalWishart, posterior=NormalWishart)>
      (1): <ConjugateBayesianParameter(prior=NormalWishart, posterior=NormalWishart)>
      (2): <ConjugateBayesianParameter(prior=NormalWishart, posterior=NormalWishart)>
      (3): <ConjugateBayesianParameter(prior=NormalWishart, posterior=NormalWishart)>
      (4): <ConjugateBayesianParameter(prior=NormalWishart, posterior=NormalWishart)>
      (5): <ConjugateBayesianParameter(prior=NormalWishart, posterior=NormalWishart)>
      (6): <ConjugateBayesianParameter(prior=NormalWishart, posterior=NormalWishart)>
      (7): <ConjugateBayesianParameter(prior=NormalWishart, posterior=NormalWishart)>
      (8): <ConjugateBayesianParameter(prior=NormalWishart, posterior=NormalWishart)>
      (9): <ConjugateBayesianParameter(prior=NormalWishart, posterior=NormalWishart)>
      (10): <ConjugateBayesianParameter(prior=

In [9]:
weights = model.weights.expected_value().numpy()
fig = figure(width=400, height=400,
             x_range=(-10, 10), y_range=(-5, 15))
fig.circle(data[:, 0], data[:, 1], alpha=.5)
for weight, normal in zip(weights, model.modelset):
    mean = normal.expected_mean.numpy()
    cov = normal.expected_cov.numpy()
    plotting.plot_normal(fig, mean, cov, alpha=.7 * weight, color='green')
show(fig)

## Variational Bayes Training 

In [10]:
epochs = 100
lrate = 1.

optim = beer.VariationalBayesOptimizer(model.mean_field_factorization(), lrate)
    
elbos = []
for epoch in range(epochs):
    optim.init_step()
    elbo = beer.evidence_lower_bound(model, X)
    elbo.backward()
    optim.step()
    elbos.append(float(elbo) / len(X))
    
# Plot the evolution of the ELBO.
fig = figure(width=400, height=400, x_axis_label='epoch',
              y_axis_label='ELBO')
fig.line(range(len(elbos) - 1), elbos[1:])
show(fig)

  log_weights = self.weights.expected_natural_parameters().view(1, -1)


In [11]:
weights = model.weights.expected_value().numpy()
fig = figure(width=400, height=400,
             x_range=(-10, 10), y_range=(-5, 15))
fig.circle(data[:, 0], data[:, 1], alpha=.5)
for weight, normal in zip(weights, model.modelset):
    mean = normal.expected_mean.numpy()
    cov = normal.expected_cov.numpy()
    plotting.plot_normal(fig, mean, cov, alpha=.5 * weight, color='green')
show(fig)