# VAE - Gaussian Linear Classifier

This notebook illustrate how to combine a Variational AutoEncoder (VAE) and a Gaussian Linear Classifier (GLC) with the [beer framework](https://github.com/beer-asr/beer).

In [1]:
%load_ext autoreload
%autoreload 2

# Add the path of the beer source code ot the PYTHONPATH.
import sys
sys.path.insert(0, '../')

import numpy as np
import torch
import torch.optim
from torch import nn
from torch.autograd import Variable


# For plotting.
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import LinearAxis, Range1d

# Beer framework
import beer

# Convenience functions for plotting.
import plotting

output_notebook(verbose=False)

## Data 

As a simple example we consider the following synthetic data: 

In [2]:
ntargets = 5
N = 100
Xs = []
labels = []
for i in range(ntargets):
    mean = np.array([0, 2. - (i * 1.5)])
    cov = np.array([[.75, 0.], [0., .075]])
    Z1 = np.random.multivariate_normal(mean, cov, size=N)
    X1 = np.zeros_like(Z1)
    X1[:, 0] = Z1[:, 0]
    X1[:, 1] = Z1[:, 1] + (Z1[:, 0]-mean[0])** 2
    labels.append(np.ones(len(X1)) * i)
    Xs.append(X1)

idxs = np.arange(0, ntargets * N)
np.random.shuffle(idxs)
data = np.vstack(Xs)[idxs]
labels = np.hstack(labels)[idxs]

test_data = data[-100:]
test_labels = labels[-100:]
data = data[:-100]
labels = labels[:-100]


fig = figure(title='Synthetic data', width=400, height=400)
colors = ['salmon', 'blue', 'green', 'yellow', 'black', 'red', 'cyan', 'purple', 'brown', 'pink']
for sX, color in zip(Xs, colors):
    fig.circle(sX[:, 0], sX[:, 1], color=color)
show(fig)

## Model Creation

We first create the VAE-GLC.

#### NOTE:
To obtain a Gaussian Quadratic Classifier, us a GMM model with individual (diagonal) covariance matrix.

In [3]:
# Dimension of the observed space.
obs_dim = data.shape[1]

# Dimension of the latent space. It can be bigger or smaller
# than the dimension of the observed space.
latent_dim = 2

# Number of units per hidden-layer.
n_units = 20

# Neural network structure of the encoder/decoder of the model.
enc_struct = nn.Sequential(
    nn.Linear(obs_dim, n_units),
    nn.Tanh(),
    nn.Linear(n_units, n_units),
    nn.Tanh(),
)


encoder = beer.MLPNormalDiag(enc_struct, latent_dim)
dec_struct = nn.Sequential(
    nn.Linear(obs_dim, n_units),
    nn.Tanh(),
    nn.Dropout(p=0.01),
    nn.Linear(n_units, n_units),
    nn.Tanh(),
    nn.Dropout(p=0.01)
)
decoder = beer.MLPNormalDiag(dec_struct, obs_dim)

# Model of the latent space.
# We use Mixture of normal with diagonal cov. It can be changed
# to other model.
# ----------------------------------------------------------------------

ncomps = ntargets
p_mean = torch.from_numpy(data.mean(axis=0)).float()
p_means = torch.cat([p_mean[None]]*ncomps, dim=0)
p_prec = 1 / torch.from_numpy(np.diag(np.cov(data.T))).float()

# GMM (diag cov).
#prior_weights = beer.DirichletPrior(torch.ones(ntargets))
#posterior_weights = beer.DirichletPrior(torch.ones(ntargets))
#prior = beer.NormalGammaPrior(torch.zeros(latent_dim), torch.ones(latent_dim), 1.)
#posts = [beer.NormalGammaPrior(torch.zeros(latent_dim), torch.ones(latent_dim), 1.)
#         for _ in range(ntargets)]
#normalset = beer.NormalDiagonalCovarianceSet(prior, posts)
#latent_model = beer.Mixture(prior_weights, posterior_weights, normalset)

prior_weights = beer.DirichletPrior(torch.ones(ncomps) * 1)
posterior_weights = beer.DirichletPrior(torch.ones(ncomps))
prior = beer.JointNormalGammaPrior(p_means, p_prec, 1)
posterior = beer.JointNormalGammaPrior(p_means,
                                       p_prec, 1.)
normalset = beer.NormalSetSharedDiagonalCovariance(prior, posterior, ncomps)
latent_model = beer.Mixture(prior_weights, posterior_weights, normalset)

# ----------------------------------------------------------------------

# Putting everything together to build the SVAE.
model = beer.VAE(encoder, decoder, latent_model, nsamples=15)

## Variational Bayes Training

In [4]:
npoints = 500
epochs = 10_000
lrate_bayesmodel = 1
lrate_encoder = 1e-3
X = torch.from_numpy(data[:npoints]).float()
targets = torch.from_numpy(labels[:npoints]).long()
loss_fn = beer.StochasticVariationalBayesLoss(len(X))

nnet_parameters = list(model.encoder.parameters()) + list(model.decoder.parameters())
std_optimizer = torch.optim.Adam(nnet_parameters, lr=lrate_encoder, weight_decay=1e-2)
params = model.latent_model.parameters
optimizer = beer.BayesianModelOptimizer(params, lrate_bayesmodel, 
    std_optim=std_optimizer)
    
elbos = []
for epoch in range(epochs):
    optimizer.zero_grad()
    loss = loss_fn(model, Variable(X), Variable(targets))
    loss.backward()
    optimizer.step()
    
    if epoch > 0:
        elbos.append(float(loss) / len(X))

# Plot the ELBO.
fig = figure(title='ELBO', width=400, height=400, x_axis_label='step',
              y_axis_label='ln p(X)')
fig.line(np.arange(len(elbos)), elbos, color='blue')

show(fig)

In [5]:
d = 100
enc_state = model.encoder(X[:d])
mean, var = enc_state.mean.data.numpy(), enc_state.var.data.numpy()
    
fig1 = figure(width=400, height=400)
for l, m, v in zip(labels[:d], mean, var):
    fig1.circle(m[0], m[1], color=colors[int(l)])
    fig1.ellipse(x=m[0], y=m[1], 
                 width=2 * np.sqrt(v[0]), 
                 height=2 * np.sqrt(v[1]), 
                 fill_alpha=0, color=colors[int(l)]) 
    fig1.cross(m[0], m[1], color=colors[int(l)])
for color, comp in zip(colors, model.latent_model.components):
    plotting.plot_latent_model(fig1, comp, alpha=.5, color=color)

fig2 = figure(title='Data', width=400, height=400)
colors = ['salmon', 'blue', 'green', 'yellow', 'black', 'red', 'cyan', 'purple', 'brown', 'pink']
for sX, color in zip(Xs, colors):
    fig2.circle(sX[:, 0], sX[:, 1], color=color)

grid = gridplot([[fig1, fig2]])
show(grid)