# Bayesian Model

This notebook illustrate how to use a Bayesian Normal density model with the [beer framework](https://github.com/beer-asr/beer). The Normal distribution is a fairly basic models but it is used extenslively in other model as a basic building block.

In [1]:
# Add "beer" to the PYTHONPATH
import random
from collections import defaultdict
import sys
sys.path.insert(0, '../')


import beer
import numpy as np
import torch

# For plotting.
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
output_notebook()

# Convenience functions for plotting.
import plotting

%load_ext autoreload
%autoreload 2

In [2]:
torch.__version__

'0.4.1'

## Data

Generate some normally distributed data:

In [3]:
mean = np.zeros(2) 
cov = np.array([
    [1, .95],
    [.95, 1]
])
data = np.random.multivariate_normal(mean, cov, size=1)

fig = figure(
    title='Data',
    width=400,
    height=400,
    x_range=(mean[0] - 5, mean[0] + 5),
    y_range=(mean[1] - 5, mean[1] + 5)
)
fig.circle(data[:, 0], data[:, 1])
plotting.plot_normal(fig, mean, cov, line_color='black', fill_alpha=.3)

show(fig)

## Model Creation

We create two types of Normal distribution: one diagonal covariance matrix and another one with full covariance matrix.

In [4]:
data_mean = torch.zeros(2) + torch.FloatTensor([0, 100])
data_var = torch.ones(2) * 50

normal_iso = beer.Normal.create(data_mean, data_var, 1., cov_type='isotropic')
normal_diag = beer.Normal.create(data_mean, data_var, 1., cov_type='diagonal')
normal_full = beer.Normal.create(data_mean, data_var, 1., cov_type='full')

models = {
    'normal_full': normal_full,
    'normal_diag': normal_diag,
    'normal_iso': normal_iso
}

## Variational Bayes Training 

In [5]:
nbatches = 1
X = torch.from_numpy(data).float()
batches = X.view(nbatches, -1, 2)
batches.shape

torch.Size([1, 1, 2])

In [6]:
epochs = 2
lrate = 1


optims = {
    model_name: beer.BayesianModelOptimizer(
        model.mean_field_groups, lrate)
    for model_name, model in models.items()
}

elbos = {
    model_name: [] 
    for model_name in models
}  


for epoch in range(epochs):
    for name, model in models.items():
        batch_ids = list(range(len(batches)))
        random.shuffle(batch_ids)
        for batch_id in batch_ids:
            batch = batches[batch_id]
            
            optim = optims[name]

            optim.init_step()
            elbo = beer.evidence_lower_bound(model, batch, datasize=len(X))
            elbo.backward()
            optim.step()
            
            elbo = beer.evidence_lower_bound(model, X)
            elbos[name].append(float(elbo) / len(X))
        

In [7]:
colors = {
    'normal_iso': 'green',
    'normal_diag': 'blue',
    'normal_full': 'red',
    
}
# Plot the ELBO.
fig = figure(title='ELBO', width=400, height=400, x_axis_label='step',
              y_axis_label='ln p(X)')
for model_name, elbo in elbos.items():
    fig.line(range(len(elbo)), elbo, legend=model_name, color=colors[model_name])
fig.legend.location = 'bottom_right'

show(fig)

In [8]:
models['normal_full'].mean_precision.posterior.log_norm(models['normal_full'].mean_precision.posterior.natural_parameters), \
models['normal_diag'].mean_precision.posterior.log_norm(models['normal_diag'].mean_precision.posterior.natural_parameters), \
models['normal_iso'].mean_precision.posterior.log_norm(models['normal_iso'].mean_precision.posterior.natural_parameters)

(tensor([[-17.8127]]), tensor([[-18.5268]]), tensor([[-16.2973]]))

In [9]:
models['normal_full'].mean_precision.posterior.to_std_parameters(), \
models['normal_diag'].mean_precision.posterior.to_std_parameters(), \
models['normal_iso'].mean_precision.posterior.to_std_parameters()

((tensor([[ 0.8463, 51.0612]]), tensor([[2.]]), tensor([[[0.0100, 0.0002],
           [0.0002, 0.0002]]]), tensor([[3.]])),
 (tensor([[ 0.8463, 51.0612]]),
  tensor([[2.]]),
  tensor([[1.5000]]),
  tensor([[  50.7163, 2445.0056]])),
 (tensor([[ 0.8463, 51.0612]]),
  tensor([[2.]]),
  tensor([[2.]]),
  tensor([[2445.7219]])))

In [10]:
models['normal_full'].cov, models['normal_diag'].cov, models['normal_iso'].cov

(tensor([[  33.8108,  -27.6119],
         [ -27.6119, 1630.0037]]), tensor([[  33.8108,    0.0000],
         [   0.0000, 1630.0037]]), tensor([[1222.8610,    0.0000],
         [   0.0000, 1222.8610]]))

In [11]:
1 / torch.tensor([[2549.3416, 2462.0051]])

tensor([[0.0004, 0.0004]])

In [12]:
models['normal_full'].kl_div_posterior_prior(), \
models['normal_diag'].kl_div_posterior_prior(), \
models['normal_iso'].kl_div_posterior_prior()

(tensor([3.7713]), tensor([3.6299]), tensor([3.5264]))

In [13]:
models['normal_full'].expected_log_likelihood(models['normal_full'].sufficient_statistics(X)), \
models['normal_diag'].expected_log_likelihood(models['normal_diag'].sufficient_statistics(X)), \
models['normal_iso'].expected_log_likelihood(models['normal_iso'].sufficient_statistics(X))

(tensor([-9.2000]), tensor([-8.9107]), tensor([-10.6967]))

In [14]:
print(elbos)

{'normal_full': [-12.971304893493652, -12.971304893493652], 'normal_diag': [-12.540616035461426, -12.540616035461426], 'normal_iso': [-14.22319221496582, -14.22319221496582]}


In [15]:
stats = models['normal_iso'].sufficient_statistics(X) 
stats.sum(dim=0), \
models['normal_iso'].mean_precision.prior.natural_parameters + stats.sum(dim=0),\
models['normal_iso'].mean_precision.posterior.natural_parameters

(tensor([-3.6848,  1.6926,  2.1224, -0.5000,  1.0000]),
 tensor([-5053.6851,     1.6926,   102.1224,    -1.0000,     2.0000]),
 tensor([-5053.6851,     1.6926,   102.1224,    -1.0000,     2.0000]))

In [16]:
fig = figure(
    width=400,
    height=400,
    x_range=(mean[0] - 5, mean[0] + 5),
    y_range=(mean[1] - 5, mean[1] + 5)
)
fig.circle(data[:, 0], data[:, 1])
plotting.plot_normal(fig, normal_iso.mean.numpy(), normal_iso.cov.numpy(), 
                     line_color='black', fill_alpha=.3, color='green')
plotting.plot_normal(fig, normal_diag.mean.numpy(), normal_diag.cov.numpy(), 
                     line_color='black', fill_alpha=.3, color='#98AFC7')
plotting.plot_normal(fig, normal_full.mean.numpy(), normal_full.cov.numpy(), 
                     line_color='black', fill_alpha=.3, color='#C7B097')

show(fig)

In [17]:
normal_iso = beer.Normal.create(data_mean, data_var, 1., cov_type='isotropic')
normal_diag = beer.Normal.create(data_mean, data_var, 1., cov_type='diagonal')
normal_full = beer.Normal.create(data_mean, data_var, 1., cov_type='full')

models = {
    'normal_full': normal_full,
    'normal_diag': normal_diag,
    'normal_iso': normal_iso
}

model = models['normal_iso']

In [18]:
epochs = 2
lrate = 1

optims = {model_name: beer.CVBOptimizer(model.bayesian_parameters())
          for model_name, model in models.items()}
elbos = {model_name: [] for model_name in models}  
batch_stats = {model_name: defaultdict(lambda: defaultdict(lambda: None))
               for model_name in models}

for epoch in range(epochs):
    batch_ids = list(range(len(batches)))
    random.shuffle(batch_ids)
    for name, model in models.items():
        for batch_id in batch_ids:
            optim = optims[name]
            #optim.init_step() 
            optim.init_step(batch_stats[name][batch_id]) 
            elbo = beer.collapsed_evidence_lower_bound(model, batches[batch_id], lrate=lrate)
            batch_stats[name][batch_id] = elbo.backward()
            optim.step()
            elbos[name].append(float(elbo) / len(X))

In [19]:
colors = {
    'normal_iso': 'green',
    'normal_diag': 'blue',
    'normal_full': 'red',
    
}
# Plot the ELBO.
fig = figure(title='ELBO', width=400, height=400, x_axis_label='step',
              y_axis_label='ln p(X)')
for model_name, elbo in elbos.items():
    fig.line(range(len(elbo)), elbo, legend=model_name, color=colors[model_name])
fig.legend.location = 'bottom_right'

show(fig)

In [20]:
models['normal_full'].kl_div_posterior_prior(), \
models['normal_diag'].kl_div_posterior_prior(), \
models['normal_iso'].kl_div_posterior_prior()

(tensor([3.7713]), tensor([3.6299]), tensor([3.5264]))

In [21]:
print(elbos)

{'normal_full': [-13.003498077392578, -13.003498077392578], 'normal_diag': [-12.540616989135742, -12.540616989135742], 'normal_iso': [-14.22319221496582, -14.22319221496582]}


In [22]:
fig = figure(
    width=400,
    height=400,
    x_range=(mean[0] - 5, mean[0] + 5),
    y_range=(mean[1] - 5, mean[1] + 5)
)
fig.circle(data[:, 0], data[:, 1])
plotting.plot_normal(fig, normal_iso.mean.numpy(), normal_iso.cov.numpy(), 
                     line_color='black', fill_alpha=.3, color='green')
plotting.plot_normal(fig, normal_diag.mean.numpy(), normal_diag.cov.numpy(), 
                     line_color='black', fill_alpha=.3, color='#98AFC7')
plotting.plot_normal(fig, normal_full.mean.numpy(), normal_full.cov.numpy(), 
                     line_color='black', fill_alpha=.3, color='#C7B097')

show(fig)

In [23]:
stats = models['normal_iso'].sufficient_statistics(X) 
stats.sum(dim=0), \
models['normal_iso'].mean_precision.prior.natural_parameters + stats.sum(dim=0),\
models['normal_iso'].mean_precision.posterior.natural_parameters

(tensor([-3.6848,  1.6926,  2.1224, -0.5000,  1.0000]),
 tensor([-5053.6851,     1.6926,   102.1224,    -1.0000,     2.0000]),
 tensor([-5053.6851,     1.6926,   102.1224,    -1.0000,     2.0000]))