# Variational AutoEncoder

This notebook illustrate how to build and train a Variation AutoEncoder with the [beer framework](https://github.com/beer-asr/beer).

In [1]:
# Add "beer" to the PYTHONPATH
import sys
sys.path.insert(0, '../')

import copy

import beer
import numpy as np
import torch

import torchvision
import torchvision.transforms as transforms

# For plotting.
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import LinearAxis, Range1d
output_notebook()

# Convenience functions for plotting.
import plotting

%load_ext autoreload
%autoreload 2

## Data

As an illustration, we generate a synthetic data set composed of two Normal distributed cluster.
One has a diagonal covariance matrix whereas the other has a dense covariance matrix.
Those two clusters overlap so it is reasonable to map all the data to a single Gaussian in the latent space.

In [2]:
root = './data'
download = False  # set to True if the line "train_set = ..." complains

trans = transforms.Compose([
    transforms.RandomVerticalFlip(p=1.0),
    transforms.ToTensor(), 
#    transforms.Normalize((0.5,), (1.0,)),
])
train_set = torchvision.datasets.MNIST(root=root, train=True, transform=trans, download=download)
test_set = torchvision.datasets.MNIST(root=root, train=False, transform=trans)

batch_size = 16

train_loader = torch.utils.data.DataLoader(
                 dataset=train_set,
                 batch_size=batch_size,
                 shuffle=True)
test_loader = torch.utils.data.DataLoader(
                dataset=test_set,
                batch_size=batch_size,
                shuffle=False)

print('==>>> total trainning batch number: {}'.format(len(train_loader)))
print('==>>> total testing batch number: {}'.format(len(test_loader)))

==>>> total trainning batch number: 3750
==>>> total testing batch number: 625


In [3]:
X, t = next(iter(train_loader))

In [4]:
sqrt_nb = 3
fig = figure(x_range=[0, sqrt_nb*28], y_range=[0, sqrt_nb*28])
for i in range(sqrt_nb):
    for j in range(sqrt_nb):
        fig.image(image=[X[i*sqrt_nb + j][0].numpy()], x=j*28, y=(sqrt_nb-i-1)*28, dw=27, dh=27)
show(fig)

print(t[:sqrt_nb**2].view(sqrt_nb,sqrt_nb))


tensor([[ 0,  1,  0],
        [ 0,  9,  6],
        [ 8,  1,  1]])


##Model

We build a VAE with Gaussian distribution in the latent space and Bernouli distribution on individual pixel in the observed space.

In [5]:
observed_dim = 28*28
latent_dim = 2

In [14]:
vae = torch.load('/mnt/matylda5/ibenes/projects/beer/vae.mdl')

In [18]:
sqrt_nb = 20 # how many samples per axis

# what range to cover. Note that it is a half range (2 <=> -2 -- +2).
# Consult output of the next cell for optimal valuer
latent_range = 3

latent_step = 2*latent_range / (sqrt_nb-1) # -1 so that we can place the end ones on the ends
latent_positions = [-latent_range + i*latent_step for i in range(sqrt_nb)]

complete_range = [-latent_range-latent_step/2, latent_range+latent_step/2]
fig = figure(x_range=complete_range, y_range=complete_range)
for ly in latent_positions:
    for lx in latent_positions:
        latent_repre = torch.Tensor([lx, ly])
        image = vae.decoder(torch.autograd.Variable(latent_repre)).mu
        image = image.view(28,28).data
        fig.image(
            image=[image.numpy()], 
            x=lx-latent_step/2, y=ly-latent_step/2, 
            dw=latent_step, dh=latent_step
        )
show(fig)


In [19]:
latent_images = []
ts = []
for X, t in train_loader:
    X = torch.autograd.Variable(X.view(-1, 28**2))
    latent_images.append(vae.encoder(X).mean)
    ts.append(t)
    
latent_images = torch.cat(latent_images).data
ts = torch.cat(ts).data
print(latent_images.mean(dim=0))


tensor([-0.0657, -0.5781])


In [20]:
#           0      1       2          3        4       5        6         7        8         9
colors = ['red', 'blue', 'green', 'purple', 'black', 'cyan', 'yellow', 'brown', 'violet', 'olive']
fig = figure(
    title='p(X)', width=400, height=400, 
    x_range=[-latent_range, latent_range], y_range=[-latent_range, latent_range]
)

SUBSET_SIZE=-1
li_subset = latent_images[:SUBSET_SIZE]
ts_subset = ts[:SUBSET_SIZE]

for i in range(10): # plot each digit seperately
    mask = (ts_subset == i).nonzero().view(-1)
    selection = li_subset[mask]
    fig.circle(selection[:,0].numpy(), selection[:,1].numpy(), color=colors[i], size=0.2)
show(fig)

In [None]:
vae.encoder.output_layer[0].bias