# VAE - Gaussian Linear Classifier

This notebook illustrate how to combine a Variational AutoEncoder (VAE) and a Gaussian Linear Classifier (GLC) with the [beer framework](https://github.com/beer-asr/beer).

In [None]:
%load_ext autoreload
%autoreload 2

# Add the path of the beer source code ot the PYTHONPATH.
import sys
sys.path.insert(0, '../')

import numpy as np
import torch
import torch.optim
from torch import nn
from torch.autograd import Variable


# For plotting.
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import LinearAxis, Range1d

# Beer framework
import beer

# Convenience functions for plotting.
import plotting

output_notebook(verbose=False)

## Data 

As a simple example we consider the following synthetic data: 

In [None]:
ntargets = 5
N = 100
Xs = []
labels = []
for i in range(ntargets):
    mean = np.array([0, 2. - (i * 1.5)])
    cov = np.array([[.75, 0.], [0., .075]])
    Z1 = np.random.multivariate_normal(mean, cov, size=N)
    X1 = np.zeros_like(Z1)
    X1[:, 0] = Z1[:, 0]
    X1[:, 1] = Z1[:, 1] + (Z1[:, 0]-mean[0])** 2
    labels.append(np.ones(len(X1)) * i)
    Xs.append(X1)

idxs = np.arange(0, ntargets * N)
np.random.shuffle(idxs)
data = np.vstack(Xs)[idxs]
labels = np.hstack(labels)[idxs]

test_data = data[-100:]
test_labels = labels[-100:]
data = data[:-100]
labels = labels[:-100]


fig = figure(title='Synthetic data', width=400, height=400)
colors = ['salmon', 'blue', 'green', 'yellow', 'black', 'red', 'cyan', 'purple', 'brown', 'pink']
for sX, color in zip(Xs, colors):
    fig.circle(sX[:, 0], sX[:, 1], color=color)
show(fig)

## Model Creation

We first create the VAE-GLC.

#### NOTE:
To obtain a Gaussian Quadratic Classifier, us a GMM model with individual (diagonal) covariance matrix.

In [None]:
# Dimension of the observed space.
obs_dim = data.shape[1]

# Dimension of the latent space. In this example, it is equal to the 
# dimension of the observed space for ease of visualisation.
# We also exploit it for a direct skip-connection from input
# to the mean.
latent_dim = obs_dim

# Number of units per hidden-layer.
n_units = 20

class GaussianMLP(nn.Module):
    def __init__(self, structure, space_dim):
        super().__init__()
        self.nn = structure
        self.h2mean = nn.Linear(n_units, space_dim)
        self.h2logvar = nn.Linear(n_units, space_dim)
    
        self.h2logvar.bias.data += -1.0 # init with small (log)variance
            
    def forward(self, X):
        h = self.nn(X)
        mean = self.h2mean(h)
        logvar = self.h2logvar(h)
        return beer.NormalDiagonalCovarianceMLP(mean + X, logvar.exp())

# Neural network structure of the encoder/decoder of the model.
enc_struct = nn.Sequential(
    nn.Linear(obs_dim, n_units),
    nn.Tanh(),
    nn.Linear(n_units, n_units),
    nn.Tanh(),
)

dec_struct = nn.Sequential(
    nn.Linear(obs_dim, n_units),
    nn.Tanh(),
    nn.Dropout(p=0.01),
    nn.Linear(n_units, n_units),
    nn.Tanh(),
    nn.Dropout(p=0.01)
)

# Model of the latent space.
# We use Mixture of normal with diagonal cov. It can be changed
# to other model.
# ----------------------------------------------------------------------

weights = torch.ones(ntargets) / ntargets
normalset = beer.NormalSetSharedDiagonalCovariance.create(
    torch.zeros(latent_dim), torch.ones(latent_dim), ntargets, noise_std=0.1
)

latent_model = beer.Mixture.create(weights, normalset)

# ----------------------------------------------------------------------

# Putting everything together to build the SVAE.
model = beer.VAE(GaussianMLP(enc_struct, obs_dim), GaussianMLP(dec_struct, obs_dim), latent_model, nsamples=15)
elbos = []

## Variational Bayes Training

In [None]:
npoints = 500
epochs = 5_000
lrate_bayesmodel = 1
lrate_encoder = 1e-3
X = torch.from_numpy(data[:npoints]).float()
targets = torch.from_numpy(labels[:npoints]).long()
elbo_fn = beer.EvidenceLowerBound(len(X))

nnet_parameters = list(model.encoder.parameters()) + list(model.decoder.parameters())
std_optimizer = torch.optim.Adam(nnet_parameters, lr=lrate_encoder, weight_decay=1e-2)
params = model.latent_model.parameters
optimizer = beer.BayesianModelOptimizer(params, lrate_bayesmodel, 
    std_optim=std_optimizer)
    
for epoch in range(epochs):
    optimizer.zero_grad()
    elbo = elbo_fn(model, Variable(X), Variable(targets))
    elbo.backward()
    elbo.natural_backward()
    optimizer.step()
    
    if epoch > 0:
        elbos.append(float(elbo) / len(X))

# Plot the ELBO.
fig = figure(title='ELBO', width=400, height=400, x_axis_label='step',
              y_axis_label='ln p(X)')
fig.line(np.arange(len(elbos)), elbos, color='blue')

show(fig)

In [None]:
d = 100
enc_state = model.encoder(X[:d])
mean, var = enc_state.mean.data.numpy(), enc_state.var.data.numpy()
    
fig1 = figure(width=400, height=400)
for l, m, v in zip(labels[:d], mean, var):
    fig1.circle(m[0], m[1], color=colors[int(l)])
    fig1.ellipse(x=m[0], y=m[1], 
                 width=2 * np.sqrt(v[0]), 
                 height=2 * np.sqrt(v[1]), 
                 fill_alpha=0, color=colors[int(l)]) 
    fig1.cross(m[0], m[1], color=colors[int(l)])
for color, comp in zip(colors, model.latent_model.modelset):
    plotting.plot_normal(fig1, comp.mean.numpy(), comp.cov.numpy(), alpha=.3,
                         line_color='black', color=color)

fig2 = figure(title='Data', width=400, height=400)
colors = ['salmon', 'blue', 'green', 'yellow', 'black', 'red', 'cyan', 'purple', 'brown', 'pink']
for sX, color in zip(Xs, colors):
    fig2.circle(sX[:, 0], sX[:, 1], color=color)

grid = gridplot([[fig1, fig2]])
show(grid)