# Probabilistic Linear Discriminant Analysis (PLDA)

This notebook illustrate how to use a PLDA with the [beer framework](https://github.com/beer-asr/beer).

In [1]:
%load_ext autoreload
%autoreload 2

# Add the path of the beer source code ot the PYTHONPATH.
import sys
sys.path.insert(0, '../')


import numpy as np
import torch
import torch.optim

from torch import nn
from torch.autograd import Variable


# For plotting.
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import LinearAxis, Range1d

# Beer framework
import beer

# Convenience functions for plotting.
import plotting

output_notebook(verbose=False)

## Data 

In [2]:
ntargets = 3
N = 100
Xs = []
labels = []
cov = np.array([
    [1, -.5],
    [-.5, 3], 
])

shift = 100
mean = 5 * np.array([1, .25]) + shift
X = np.random.multivariate_normal(mean, cov, size=N)
labels.append(np.ones(len(X)) * 0)
Xs.append(X)

mean = 3 * np.array([0, 0]) + shift
X = np.random.multivariate_normal(mean, cov, size=N)
labels.append(np.ones(len(X)))
Xs.append(X)

mean = 5 * np.array([-1, -.25]) + shift
X = np.random.multivariate_normal(mean, cov, size=N)
labels.append(np.ones(len(X)) * 2)
Xs.append(X)

idxs = np.arange(0, ntargets * N)
np.random.shuffle(idxs)
data = np.vstack(Xs)[idxs]
labels = np.hstack(labels)[idxs]

test_data = data[-100:]
test_labels = labels[-100:]
data = data[:-100]
labels = labels[:-100]


x_range = (shift - 10, shift + 10)
y_range = (shift - 10, shift + 10)

fig = figure(title='Synthetic data', width=400, height=400, x_range=x_range,
             y_range=y_range)
colors = ['salmon', 'blue', 'green', 'yellow', 'black', 'red', 'cyan', 'purple', 'brown', 'pink']
for sX, color in zip(Xs, colors):
    fig.circle(sX[:, 0], sX[:, 1], color=color)
show(fig)

## Model

In [11]:
# Number of components in the mixture.
ncomps = ntargets

# Dimension of the observed space.
obs_dim = data.shape[1]

# Dimension of the noise subspace.
noise_s_dim = obs_dim - 1

# Dimension of the class subspace.
class_s_dim = 10

# Mean of the prior distributions.
mean = torch.from_numpy(data.mean(axis=0)).double()
prec = 1.
noise_s = torch.randn(noise_s_dim, obs_dim).double()
class_s = torch.randn(class_s_dim, obs_dim).double()
means = torch.zeros(ncomps, class_s_dim).double()
weights = torch.ones(ncomps).double() / ntargets
pseudo_counts = 1.

pldaset = beer.PLDASet.create(mean, prec, noise_s, class_s, means, 
                              pseudo_counts)
plda = beer.Mixture.create(weights, pldaset)

## Variational Bayes Training

In [12]:
epochs = 100
lrate = 1.
plda = plda.float()
X = torch.from_numpy(data).float()
targets = torch.from_numpy(labels).long()
elbo_fn = beer.EvidenceLowerBound(len(X))
optimizer = beer.BayesianModelCoordinateAscentOptimizer(
    *plda.grouped_parameters, 
    lrate=lrate
)

elbos = []
for epoch in range(epochs):
    optimizer.zero_grad()
    elbo = elbo_fn(plda, X, targets)
    elbo.natural_backward()
    optimizer.step()
    
    if epochs > 0:
        elbos.append(float(elbo) / len(X))
        #print(plda.modelset.class_subspace)
    
# Plot the ELBO.
fig = figure(title='ELBO', width=400, height=400, x_axis_label='step',
              y_axis_label='ln p(X)')
fig.line(np.arange(len(elbos)), elbos, color='blue')

show(fig)

In [5]:
torch.zeros(2, dtype=mean.dtype)

tensor([ 0.,  0.], dtype=torch.float64)

## Plotting

In [6]:
fig1 = figure(title='PLDA', width=400, height=400, x_range=x_range, y_range=y_range)
fig1.circle(data[:, 0], data[:, 1], alpha=.1)
plotting.plot_gmm(fig1, plda, alpha=.5, color='blue')
show(fig1)