# Discriminative Variational Model

This notebook illustrate how to build and train a Discriminative Variational Model (SVAE) with the [beer framework](https://github.com/beer-asr/beer).

In [1]:
%load_ext autoreload
%autoreload 2

# Add the path of the beer source code ot the PYTHONPATH.
import sys
sys.path.insert(0, '../')

import numpy as np
import torch
from torch import nn

# For plotting.
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import LinearAxis, Range1d

# Beer framework
import beer

# Convenience functions for plotting.
import plotting

output_notebook(verbose=False)

## Data 

As a simple example we consider the following synthetic data: 

$$ 
\begin{split}
    z &\sim \mathcal{N}(m, \Sigma) \\
    x &= 
        \begin{pmatrix}
        z_1 \\
        z_2 + (z_1 - m_1)^2
        \end{pmatrix} 
\end{split}
$$

In [2]:
ntargets = 5
N = 100
Xs = []
labels = []
for i in range(ntargets):
    mean = np.array([3., 2. - (i * 1.5)])
    cov = np.array([[.75, 0.], [0., .075]])
    Z1 = np.random.multivariate_normal(mean, cov, size=N)
    X1 = np.zeros_like(Z1)
    X1[:, 0] = Z1[:, 0]
    X1[:, 1] = Z1[:, 1] + (Z1[:, 0]-mean[0])** 2
    labels.append(np.ones(len(X1)) * i)
    Xs.append(X1)

idxs = np.arange(0, ntargets * N)
np.random.shuffle(idxs)
X = np.vstack(Xs)[idxs]
labels = np.hstack(labels)[idxs]

X -= X.mean(axis=0)
X /= np.sqrt(X.var(axis=0))

fig = figure(title='Synthetic data', width=400, height=400)
colors = ['salmon', 'blue', 'green', 'yellow', 'black', 'red', 'cyan', 'purple', 'brown', 'pink']
for sX, color in zip(Xs, colors):
    fig.circle(sX[:, 0], sX[:, 1], color=color)
show(fig)

In [3]:
labels.shape, X.shape

((500,), (500, 2))

## Model Creation

We first create the DVM.

In [4]:
# Dimension of the observed space.
obs_dim = X.shape[1]

# Dimension of the latent space. It can be bigger or smaller
# than the dimension of the observed space.
latent_dim = 2

# Number of units per hidden-layer.
n_units = 10

# Neural network structure of the encoder of the model.
enc_struct = nn.Sequential(
    nn.Linear(obs_dim, n_units),
    nn.Tanh()
)
encoder = beer.MLPNormalDiag(enc_struct, latent_dim)

# Model of the latent space (uncomment the one you want to try).
# It can be changed at any-time. 
# ----------------------------------------------------------------------

args = {
    'prior_mean': torch.zeros(latent_dim), 
    'prior_cov': torch.eye(latent_dim), 
    'prior_count': 1, 'random_init': False
}
#latent_model = beer.Mixture.create(torch.ones(ntargets), beer.NormalDiagonalCovariance.create, args)
latent_model = beer.Mixture.create(torch.ones(ntargets), beer.NormalFullCovariance.create, args)

# ----------------------------------------------------------------------

# Putting everything together to build the SVAE.
dvm = beer.DiscriminativeVariationalModel(encoder, latent_model)

## Variational Bayes Training

In [5]:
# Callback to monitor the training progress.
elbos, llhs, klds = [], [], []
def callback(elbo, llh, kld):
    elbos.append(elbo)
    llhs.append(llh)
    klds.append(kld)

# This is the training.
beer.train_dvm(
    dvm, 
    torch.from_numpy(X).float(), 
    torch.from_numpy(labels).long(),
    max_epochs=15000, 
    lrate=1e-3, 
    latent_model_lrate=1, 
    kl_weight=1,
    callback=callback
)

# Plot the ELBO.
fig1 = figure(title='ELBO', width=400, height=400, x_axis_label='step',
              y_axis_label='ln p(X)')
fig1.line(np.arange(len(elbos)), elbos)

# Plot the LLH and the KLD separately.
fig2 = figure(title='LLH + KLD', width=400, height=400,
              y_range=(min(llhs) - 1, max(llhs) + 1),
              x_axis_label='step', y_axis_label='ln p(x|...)')
fig2.line(np.arange(len(llhs)), llhs)
fig2.extra_y_ranges['KLD'] = Range1d(0, max(klds) + 1)
fig2.add_layout(LinearAxis(y_range_name="KLD", axis_label='KLD'), 'right')
fig2.line(np.arange(len(klds)), klds, y_range_name='KLD', color='green')

show(gridplot([[fig1, fig2]]))

Let's see what the VAE has learnt.

In [6]:
d = 200
elbo, llh, kld, mean, var = dvm.evaluate(torch.from_numpy(X[:d]).float(), torch.from_numpy(labels[:d]).long())
elbo, llh, kld, mean, var = elbo.data.numpy(), llh.data.numpy(), \
    kld.data.numpy(), mean.data.numpy(), var.data.numpy()
    
fig1 = figure(width=400, height=400)    
#fig1.cross(mean[:, 0], mean[:, 1], color='black')
for l, m, v in zip(labels[:d], mean, var):
    fig1.ellipse(x=m[0], y=m[1], 
                 width=2 * np.sqrt(v[0]), 
                height=2 * np.sqrt(v[1]), 
                fill_alpha=0, color=colors[int(l)]) 

for color, comp in zip(colors, dvm.latent_model.components):
    plotting.plot_latent_model(fig1, comp, alpha=.5, color=color)

show(fig1)  

  elif np.issubdtype(type(obj), np.float):


In [7]:
from torch.autograd import Variable
import torch.utils.data
import torch.optim

def create_simple_dvm(indim, latent_dim, nunits, ntargets):
    enc_struct = nn.Sequential(
        nn.Linear(indim, latent_dim),
        nn.Tanh()
    )
    encoder = beer.MLPNormalDiag(enc_struct, latent_dim)

    args = {
        'prior_mean': torch.zeros(latent_dim), 
        'prior_cov': torch.eye(latent_dim), 
        'prior_count': 1, 'random_init': True
    }
    latent_model = beer.Mixture.create(torch.ones(ntargets), beer.NormalDiagonalCovariance.create, args)
    #latent_model = beer.Mixture.create(torch.ones(ntargets), beer.NormalFullCovariance.create, args)
    
    return beer.DiscriminativeVariationalModel(encoder, latent_model)

def create_simple_mlp(indim, latent_dim, nunits, ntargets):
    return nn.Sequential(
        nn.Linear(indim, latent_dim),
        nn.Tanh(),
        nn.Linear(latent_dim, ntargets)
    )

def mlp_error_rate(model, features, labels):
    nsamples = dvm.nsamples
    loss_fn = nn.CrossEntropyLoss()
    outputs = model(features)
    _, predicted = torch.max(outputs, dim=1)
    hits = (labels == predicted).float().sum()
    return (1 - hits / labels.size(0)).data

def dvm_error_rate(model, features, labels):
    loss_fn = nn.CrossEntropyLoss()
    outputs = model.predictions(features)
    _, predicted = torch.max(outputs, dim=1)
    hits = (labels == predicted).float().sum()
    return (1 - hits / labels.size(0)).data

def train_mlp(mlp, X, Z, epochs=1, lrate=1e-3, callback=None):
    dataset = torch.utils.data.TensorDataset(X, Z)
    trainloader = torch.utils.data.DataLoader(dataset, batch_size=len(X), shuffle=True)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(mlp.parameters(), lr=lrate)
    for epoch in range(epochs):
        for i, data in enumerate(trainloader):
            inputs, labels = Variable(data[0]), Variable(data[1])
            optimizer.zero_grad()
            outputs = mlp(inputs)
            loss = loss_fn(outputs, labels)

            loss.backward()
            optimizer.step()
            if callback is not None:
                callback(float(loss / len(outputs)))

In [8]:
# Callback to monitor the training progress.
xents = []
def callback(xent):
    xents.append(xent)

mlp = create_simple_mlp(2, 2, 10, ntargets)
train_mlp(
    mlp, torch.from_numpy(X).float(), torch.from_numpy(labels).long(),
    epochs=15000, callback=callback)


# Plot the ELBO.
fig1 = figure(title='ELBO', width=400, height=400, x_axis_label='step',
              y_axis_label='ln p(X)')
fig1.line(np.arange(len(xents)), xents)

show(fig1)

In [9]:
d = 200
outputs = mlp[:-1](torch.from_numpy(X[:d]).float()).data.numpy()
print(outputs.shape)
fig1 = figure(width=400, height=400)    
for l, o in zip(labels, outputs):
    fig1.circle(o[0], o[1], color=colors[int(l)])

show(fig1)

(200, 2)


  elif np.issubdtype(type(obj), np.float):


In [10]:
mlp_error_rate(mlp, torch.from_numpy(X).float(), torch.from_numpy(labels).long()).data.numpy(), \
    dvm_error_rate(dvm, torch.from_numpy(X).float(), torch.from_numpy(labels).long()).data.numpy()

(array(0.24199998, dtype=float32), array(0., dtype=float32))

In [11]:
mlp_ers = []
dvm_ers = []
ndatapoints = [1, 2, 5, 10]

for i in ndatapoints:
    print(i)
    sX, sZ = torch.from_numpy(X[:i]).float(), \
    torch.from_numpy(labels[:i]).long()
    test_sX, test_sZ = torch.from_numpy(test_X).float(), \
    torch.from_numpy(test_labels).long()
    
    print(sZ)
    
    mlp = create_simple_mlp(2, 2, 10, 10)
    train_mlp(mlp, sX, sZ, epochs=10000)

    dvm = create_simple_dvm(2, 2, 10, 10)
    beer.train_dvm(dvm, sX, sZ, max_epochs=10000, lrate=1e-3, latent_model_lrate=1e-1, kl_weight=1)
    
    mlp_ers.append(mlp_error_rate(mlp, test_sX, test_sZ).data.numpy())
    dvm_ers.append(dvm_error_rate(dvm, test_sX, test_sZ).data.numpy())

1


NameError: name 'test_X' is not defined

In [None]:
fig = figure(width=400, height=400)
fig.line(ndatapoints, mlp_ers, color='blue', legend='MLP')
fig.line(ndatapoints, dvm_ers, color='green', legend='DVM')
fig.legend.location = 'top_right'
show(fig)

In [None]:
dvm.latent_model.components[1].count

In [None]:
sZ