# VAE - Gaussian Linear Classifier

This notebook illustrate how to combine a Variational AutoEncoder (VAE) and a Gaussian Linear Classifier (GLC) with the [beer framework](https://github.com/beer-asr/beer).

In [3]:
%load_ext autoreload
%autoreload 2

# Add the path of the beer source code ot the PYTHONPATH.
import sys
sys.path.insert(0, '../')

import math
import numpy as np
import torch
import torch.optim
from torch import nn
from torch.autograd import Variable


# For plotting.
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import LinearAxis, Range1d

# Beer framework
import beer

# Convenience functions for plotting.
import plotting

output_notebook(verbose=False)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Data 

As a simple example we consider the following synthetic data: 

In [4]:
ntargets = 5
N = 100
Xs = []
labels = []

x = np.linspace(0, 20, ntargets)
means = np.c_[x, (.1 * x)**2] 
cov = np.array([[.75, 0.], [0., .075]])

for i in range(ntargets):
    mean = means[i]
    cov = np.array([[1, -.75], [-.75, 1]])
    X = np.random.multivariate_normal(mean, cov, N)
    labels.append(np.ones(len(X)) * i)
    Xs.append(X)

idxs = np.arange(0, ntargets * N)
np.random.shuffle(idxs)
data = np.vstack(Xs)[idxs]
labels = np.hstack(labels)[idxs]

test_data = data[-100:]
test_labels = labels[-100:]
data = data[:-100]
labels = labels[:-100]


fig = figure(title='Synthetic data', width=400, height=400)
colors = ['salmon', 'blue', 'green', 'yellow', 'black', 'red', 'cyan', 'purple', 'brown', 'pink']
for sX, color in zip(Xs, colors):
    fig.circle(sX[:, 0], sX[:, 1], color=color)
show(fig)

In [5]:
ntargets = 5
N = 100
Xs = []
labels = []
for i in range(ntargets):
    mean = np.array([0, 2. - (i * 1.5)])
    cov = np.array([[.75, 0.], [0., .075]])
    Z1 = np.random.multivariate_normal(mean, cov, size=N)
    X1 = np.zeros_like(Z1)
    X1[:, 0] = Z1[:, 0]
    X1[:, 1] = Z1[:, 1] + (Z1[:, 0]-mean[0])** 2
    labels.append(np.ones(len(X1)) * i)
    Xs.append(X1)

idxs = np.arange(0, ntargets * N)
np.random.shuffle(idxs)
data = np.vstack(Xs)[idxs]
labels = np.hstack(labels)[idxs]
data = (data - data.mean(axis=0)) / np.sqrt(data.var(axis=0))


test_data = data[-100:]
test_labels = labels[-100:]
data = data[:-100]
labels = labels[:-100]


fig = figure(title='Synthetic data', width=400, height=400)
colors = ['salmon', 'blue', 'green', 'yellow', 'black', 'red', 'cyan', 'purple', 'brown', 'pink']
for sX, color in zip(Xs, colors):
    fig.circle(sX[:, 0], sX[:, 1], color=color)
show(fig)

In [6]:
data.mean(axis=0), data.var(axis=0)

(array([0.04454875, 0.01087412]), array([1.01510389, 1.00751736]))

## Model Creation

We first create the VAE-GLC.

#### NOTE:
To obtain a Gaussian Quadratic Classifier, us a GMM model with individual (diagonal) covariance matrix.

In [7]:
data_mean = torch.from_numpy(data.mean(axis=0)).float()
data_var = torch.from_numpy(np.var(data, axis=0)).float()


# Dimension of the observed space.
obs_dim = data.shape[1]

# Dimension of the latent space. In this example, it is equal to the 
# dimension of the observed space for ease of visualisation.
# We also exploit it for a direct skip-connection from input
# to the mean.
latent_dim = obs_dim

# Number of units per hidden-layer.
n_units = 20

llh_model_conf = {
    'type': 'Normal',
    'covariance': 'isotropic',
    'prior_strength': 1.,
    'noise_std': 0,
}

# Putting everything together to build the SVAE.
model = beer.create_model({
    'type': 'NonLinearSubspaceModel',
    'normal_model': llh_model_conf,
    'encoder': 
    {
        'type': 'FeedForwardEncoder',
        'dim_out': latent_dim,
        'dim_hlayer': n_units,
        'n_layer': 2,
        'non_linearity': 'tanh',
        'covariance': 'isotropic'
    },
    'decoder':
    {
        'type': 'FeedForwardDecoder',
        'dim_hlayer': n_units,
        'n_layer': 1,
        'non_linearity': 'tanh',
    },
    'latent_model':
    {
        'type': 'Mixture',
        'prior_strength': 1.,
        'components': {
            'type': 'PLDASet',
            'size': ntargets,
            'dim_noise_subspace': 1,
            'dim_class_subspace': 1,
            'prior_strength': 1.,
            'noise_std': 0.1
        }
    }
}, data_mean, .01 * data_var).double()

In [None]:
model.encoder

## Variational Bayes Training

### 1. Pre-training

In [None]:
npoints = N * ntargets
epochs = 1_000
lrate_bayesmodel = 1.
lrate_encoder = 1e-3
X = torch.from_numpy(data[:npoints]).double()
model = model.double()
targets = torch.from_numpy(labels[:npoints]).long()

nnet_parameters = list(model.encoder.parameters()) + list(model.decoder.parameters())
std_optimizer = torch.optim.Adam(nnet_parameters, lr=lrate_encoder, weight_decay=1e-2)
optimizer = beer.BayesianModelCoordinateAscentOptimizer(
    *model.normal.grouped_parameters, 
    *model.latent_model.grouped_parameters,
    lrate=lrate_bayesmodel, 
    std_optim=std_optimizer)
    
elbos = []
for epoch in range(epochs):
    optimizer.zero_grad()
    elbo = beer.evidence_lower_bound(model, X, datasize=len(X), 
                                     labels=targets, nsamples=1, kl_weight=.1)
    elbo.backward()
    elbo.natural_backward()
    optimizer.step()
    
    if epoch > 0:
        elbos.append(float(elbo) / len(X))

# Plot the ELBO.
fig = figure(title='ELBO', width=400, height=400, x_axis_label='step',
              y_axis_label='ln p(X)')
fig.line(np.arange(len(elbos)), elbos, color='blue')

show(fig)

### 1. Training

In [None]:
epochs = 10_000

params = model.grouped_parameters
optimizer = beer.BayesianModelCoordinateAscentOptimizer(
    *model.normal.grouped_parameters,
    *model.latent_model.grouped_parameters,
    lrate=lrate_bayesmodel, 
    std_optim=std_optimizer)

elbos = []
for epoch in range(epochs):
    optimizer.zero_grad()
    elbo = beer.evidence_lower_bound(model, X, datasize=len(X), labels=targets, nsamples=5, kl_weight=1.)
    elbo.backward()
    elbo.natural_backward()
    optimizer.step()
    
    if epoch > 0:
        elbos.append(float(elbo) / len(X))

# Plot the ELBO.
fig = figure(title='ELBO', width=400, height=400, x_axis_label='step',
              y_axis_label='ln p(X)')
fig.line(np.arange(len(elbos)), elbos, color='blue')

show(fig)

In [None]:
d = 200
mean, var = model.encoder(X[:d])
mean, var = mean.data.numpy(), var.data.numpy()
fig1 = figure(width=400, height=400)
for l, m, v in zip(labels[:d], mean, var):
    fig1.circle(m[0], m[1], color=colors[int(l)])
    fig1.ellipse(x=m[0], y=m[1], 
                 width=2 * np.sqrt(v[0]), 
                 height=2 * np.sqrt(v[1]), 
                 fill_alpha=0, color=colors[int(l)]) 
    fig1.cross(m[0], m[1], color=colors[int(l)])
for color, comp in zip(colors, model.latent_model.modelset):
    plotting.plot_normal(fig1, comp.mean.numpy(), comp.cov.numpy(), alpha=.3,
                         line_color='black', color=color)

fig2 = figure(title='Data', width=400, height=400)
colors = ['salmon', 'blue', 'green', 'yellow', 'black', 'red', 'cyan', 'purple', 'brown', 'pink']
for sX, color in zip(Xs, colors):
    fig2.circle(sX[:, 0], sX[:, 1], color=color)

plotting.plot_normal(fig2, model.normal.mean.numpy(), model.normal.cov.numpy(), alpha=.5)
grid = gridplot([[fig1, fig2]])
show(grid)