# Generalized Subspace Model


In [1]:
     # Add "beer" to the PYTHONPATH
import sys
sys.path.insert(0, '../')

import math
import copy

import beer
import numpy as np
import torch

# For plotting.
from bokeh.io import show, output_notebook
from bokeh.plotting import figure, gridplot
from bokeh.models import LinearAxis, Range1d
from bokeh.palettes import Category10 as palette
output_notebook()

# Convenience functions for plotting.
import plotting

%load_ext autoreload
%autoreload 2

## Synthetic Data

As an illustration, we generate a synthetic data set composed of two Normal distributed cluster. One has a diagonal covariance matrix whereas the other has a dense covariance matrix.

In [2]:
def generate_data(global_mean, angle, size, weight):
    rotation = np.array([
        [math.cos(angle), -math.sin(angle)],
        [math.sin(angle), math.cos(angle)]
    ])
    
    scale = np.array([.5, 2.])
    init_cov = np.diag(scale**2)
    cov1 = rotation.T @ init_cov @ rotation
    cov2 = rotation @ init_cov @ rotation.T
    mean1 = global_mean + np.array([0, 2])
    mean2 = global_mean - np.array([0, 2])
    data1 = (scale * np.random.randn(int(size * weight), 2)) @ rotation + mean1
    data2 = (scale * np.random.randn(int(size * (1 - weight)), 2)) @ rotation.T + mean2
    data = np.vstack([data1, data2])
    np.random.shuffle(data)
    return data, (mean1, mean2), (cov1, cov2), (weight, 1-weight)

datasets = []
means = []
covs = []
weights = []
start_angle = -.5 * math.pi
boundary = 50
nmodels = 10
for h in np.linspace(-boundary, boundary, nmodels):
    mean = np.array([1, 0]) * h
    angle = start_angle + (h + boundary) * (math.pi) / (2 * boundary) 
    ratio = (h + boundary) / (2 * boundary)
    ratio = .1 + .8 * ratio
    data, m_means, m_covs, m_weights = generate_data(mean, angle, size=100, weight=ratio)
    datasets.append(data)
    means.append(m_means)
    covs.append(m_covs)
    weights.append(m_weights)
data = np.vstack(datasets)

# Convert the data to pytorch tensor to work with beer.  
datasets = [torch.from_numpy(data) for data in datasets]

# Colors 
colors = palette[10]

fig = figure(width=400, height=400)
for color, dataset, m_means, m_covs, m_weights in zip(colors, datasets, means, covs, weights):
    dataset = dataset.numpy()
    plotting.plot_normal(fig, m_means[0], m_covs[0], alpha=.7 * m_weights[0], color=color)
    plotting.plot_normal(fig, m_means[1], m_covs[1], alpha=.7 * m_weights[1], color=color)
    fig.circle(dataset[:, 0], dataset[:, 1], color=color)
show(fig)

## Pre-training

First we train a GMM for each dataset. This GMM will served as starting point to build the GSM at the next steps.

In [3]:
def create_gmm(dataset, size, cov_type):
    data_mean = dataset.mean(dim=0)
    data_var = dataset.var(dim=0)
    modelset = beer.NormalSet.create(data_mean, data_var / 10, size=size,
                                     noise_std=1, cov_type=cov_type)
    return beer.Mixture.create(modelset).double()  

gmms = [create_gmm(dataset, size=2, cov_type='full')
        for dataset in datasets]

print('Standard GMM:')
print('=============')
print(gmms[0])

Standard GMM:
Mixture(
  (modelset): NormalSet(
    (means_precisions): ConjugateBayesianParameter(prior=NormalWishart, posterior=NormalWishart)
  )
  (weights): ConjugateBayesianParameter(prior=Dirichlet, posterior=Dirichlet)
)


In [4]:
def fit_gmm(gmm, dataset, epochs=100):
    optim = beer.VariationalBayesOptimizer(gmm.mean_field_factorization(), lrate=1.)
    for epoch in range(epochs):
        optim.init_step()
        elbo = beer.evidence_lower_bound(gmm, dataset)
        elbo.backward()
        optim.step()

for gmm, dataset in zip(gmms, datasets):
    fit_gmm(gmm, dataset)

In [5]:
fig = figure(width=400, height=400)
for gmm, dataset, color in zip(gmms, datasets, colors):
    dataset = dataset.numpy()
    plotting.plot_gmm(fig, gmm, alpha=.5, color=color)
    fig.circle(dataset[:, 0], dataset[:, 1], color=color, alpha=.1)
    
show(fig)

## Generalized Subspace Model

In [6]:
# Prior over the latent space.
latent_dim = 3
latent_prior = beer.Normal.create(
    torch.zeros(latent_dim), 
    torch.ones(latent_dim)
).double()
    
# Create a new set of GMMs (initialized from the original GMMs)
# whose parameters modeled by the subspace.
subspace_gmms = copy.deepcopy(gmms)
for gmm in subspace_gmms:
    newparams = {
        param: beer.SubspaceBayesianParameter.from_parameter(param, latent_prior)
        for param in gmm.bayesian_parameters()
    }
    gmm.replace_parameters(newparams)

print('Subspace GMM')
print('============')
print(subspace_gmms[0])    
print()

# We keep a GMM which will serve as a "template model"
# for the GSM.
template_gmm = copy.deepcopy(subspace_gmms[0])

# Create the final Generalized Subspace Model
gsm = beer.GSM.create(template_gmm, latent_dim, latent_prior, 
                      latent_nsamples=10, params_nsamples=10).double()
print('Generalized Subspace Model')
print('==========================')
print(gsm)    
print()

latent_posts = gsm.new_latent_posteriors(len(gmms))
print('Latent Posteriors')
print('=================')
print(latent_posts)    

Subspace GMM
Mixture(
  (modelset): NormalSet(
    (means_precisions): SubspaceBayesianParameter(prior=Normal, posterior=<unspecified>)
  )
  (weights): SubspaceBayesianParameter(prior=Normal, posterior=<unspecified>)
)

Generalized Subspace Model
GSM(
  (model): Mixture(
    (modelset): NormalSet(
      (means_precisions): SubspaceBayesianParameter(prior=Normal, posterior=<unspecified>)
    )
    (weights): SubspaceBayesianParameter(prior=Normal, posterior=<unspecified>)
  )
  (affine_transform): AffineTransform(
    (weights): ConjugateBayesianParameter(prior=NormalDiagonalCovariance, posterior=NormalDiagonalCovariance)
    (bias): ConjugateBayesianParameter(prior=NormalDiagonalCovariance, posterior=NormalDiagonalCovariance)
  )
  (latent_prior): Normal(
    (mean_precision): ConjugateBayesianParameter(prior=NormalWishart, posterior=NormalWishart)
  )
)

Latent Posteriors
NormalDiagonalCovariance(
  (params): _MeanLogDiagCov(mean=Parameter containing:
  tensor([[0., 0., 0.],
        

In [7]:
gsm.update_models(subspace_gmms, latent_posts, latent_nsamples=10, params_nsamples=10)

fig = figure(width=400, height=400)
for gmm, dataset, color in zip(subspace_gmms, datasets, colors):
    dataset = dataset.numpy()
    plotting.plot_gmm(fig, gmm, alpha=.5, color=color)
    fig.circle(dataset[:, 0], dataset[:, 1], color=color, alpha=.1)
    
show(fig)

In [8]:
def accumulate_stats(models, datasets):
    for gmm, X in zip(models, datasets):
        for param in gmm.bayesian_parameters():
            param.zero_stats()
        elbo = beer.evidence_lower_bound(gmm, X) 
        elbo.backward(std_params=False)

In [9]:
epochs = 100
lrate_cjg = 1e-1
lrate_std = 1e-1
params = list(latent_posts.parameters()) + list(gsm.parameters())
std_optim = torch.optim.Adam(params, lr=lrate_std)
optim = beer.VariationalBayesOptimizer([[]], lrate=lrate_cjg, std_optim=std_optim)


elbos = []

for epoch in range(1, epochs + 1): 
    #accumulate_stats(models, datasets)
    optim.init_step()
    elbo = beer.evidence_lower_bound(gsm, subspace_gmms, latent_posts=latent_posts, 
                                     latent_nsamples=5, params_nsamples=5)
    elbo.backward()
    optim.step()
    elbos.append(float(elbo))

In [10]:
fig = figure()
fig.line(range(len(elbos)), elbos)
show(fig)

In [None]:
gsm.update_models(subspace_gmms, latent_posts, latent_nsamples=10, params_nsamples=10)

fig1 = figure(title='Standard GMM')
for gmm, dataset, color in zip(gmms, datasets, colors):
    dataset = dataset.numpy()
    plotting.plot_gmm(fig1, gmm, alpha=.5, color=color)
    fig1.circle(dataset[:, 0], dataset[:, 1], color=color, alpha=.1)
    
fig2 = figure(title='Subspace GMM', x_range=fig1.x_range, y_range=fig1.y_range)
for gmm, dataset, color in zip(subspace_gmms, datasets, colors):
    dataset = dataset.numpy()
    plotting.plot_gmm(fig2, gmm, alpha=.5, color=color)
    fig2.circle(dataset[:, 0], dataset[:, 1], color=color, alpha=.1)
    
show(gridplot([[fig1, fig2]]))

In [13]:
%%prun -s cumtime

epochs = 1_000
lrate_cjg = 1e-1
lrate_std = 1e-1
params = list(latent_posts.parameters()) + list(gsm.parameters())
std_optim = torch.optim.Adam(params, lr=lrate_std)
optim = beer.VariationalBayesOptimizer(gsm.mean_field_factorization(), lrate=lrate_cjg, std_optim=std_optim)


elbos = []
for epoch in range(1, epochs + 1): 
    gsm.update_models(subspace_gmms, latent_posts, latent_nsamples=10, params_nsamples=10)
    accumulate_stats(subspace_gmms, datasets)
    optim.init_step()
    elbo = beer.evidence_lower_bound(gsm, subspace_gmms, latent_posts=latent_posts, 
                                     latent_nsamples=5, params_nsamples=5)
    elbo.backward()
    optim.step()
    elbos.append(float(elbo))

 

         7635534 function calls (7512416 primitive calls) in 40.662 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   40.662   40.662 {built-in method builtins.exec}
        1    0.252    0.252   40.662   40.662 <string>:3(<module>)
    11000    0.348    0.000   14.883    0.001 objectives.py:112(evidence_lower_bound)
    11000    0.349    0.000   14.585    0.001 objectives.py:91(backward)
     1000    0.007    0.000   14.054    0.014 tensor.py:74(backward)
     1000    0.006    0.000   14.047    0.014 __init__.py:38(backward)
     1000   14.028    0.014   14.028    0.014 {method 'run_backward' of 'torch._C._EngineBase' objects}
    20000    0.790    0.000   12.779    0.001 gsm.py:206(_pdfvecs_from_rvectors)
     1000    0.109    0.000    9.641    0.010 gsm.py:276(update_models)
     1000    0.039    0.000    8.315    0.008 gsm.py:372(expected_log_likelihood)
    20000    2.871    0.000    7.646

In [18]:
fig = figure()
fig.line(range(len(elbos)), elbos)
show(fig)

In [70]:
gsm.update_models(subspace_gmms, latent_posts, latent_nsamples=10, params_nsamples=10)

fig1 = figure(title='Standard GMM')
for gmm, dataset, color in zip(gmms, datasets, colors):
    dataset = dataset.numpy()
    plotting.plot_gmm(fig1, gmm, alpha=.8, color=color)
    fig1.circle(dataset[:, 0], dataset[:, 1], color=color, alpha=.5)
    
fig2 = figure(title='Subspace GMM', x_range=fig1.x_range, y_range=fig1.y_range)
for gmm, dataset, color in zip(subspace_gmms, datasets, colors):
    dataset = dataset.numpy()
    plotting.plot_gmm(fig2, gmm, alpha=.8, color=color)
    fig2.circle(dataset[:, 0], dataset[:, 1], color=color, alpha=.5)
    
#fig3 = figure(title='Latent space')
#fig3.y_range = fig3.x_range
#mean, cov = gsm.latent_prior.mean.numpy(), gsm.latent_prior.cov.numpy()
#plotting.plot_normal(fig3, mean, cov, alpha=.5, color='pink')
#for mean, diag_cov, color in zip(latent_posts.params.mean, latent_posts.params.diag_cov, colors):
#    mean = mean.detach().numpy()
#    cov = diag_cov.detach().diag().numpy()
#    plotting.plot_normal(fig3, mean, cov, alpha=.5, color=color)
    
    
fig4 = figure(title='True model')
for color, dataset, m_means, m_covs, m_weights in zip(colors, datasets, means, covs, weights):
    dataset = dataset.numpy()
    plotting.plot_normal(fig4, m_means[0], m_covs[0], alpha=.7 * m_weights[0], color=color)
    plotting.plot_normal(fig4, m_means[1], m_covs[1], alpha=.7 * m_weights[1], color=color)
    fig4.circle(dataset[:, 0], dataset[:, 1], color=color)
    
#show(gridplot([[fig1, fig2], [None, fig3]]))
show(gridplot([[fig1, fig2], [fig4]]))

In [19]:
from functools import partial

def _pdfvec(param, rvecs):
    lhf = param.likelihood_fn                                                              
    npdfs = len(param)                                                                     
    dim = lhf.sufficient_statistics_dim(zero_stats=False)                                  
    totdim = dim * npdfs   
    rvec = rvecs[:, idx: idx + totdim].reshape(-1, dim)                                    
    pdfvec = lhf.pdfvectors_from_rvectors(rvec)    
    totdim_with_zerostats = npdfs * pdfvec.shape[-1]
    return pdfvec.reshape(-1, totdim_with_zerostats)
    
def _pdfvecs_from_rvectors(parameters, rvecs):                                                 
    'Map a set of real value vectors to the pdf vectors.'                                      
    retval = []                                                                                
    idx = 0                                                                                    
    for param in parameters:         
        lhf = param.likelihood_fn                                                              
        npdfs = len(param)                                                                     
        dim = lhf.sufficient_statistics_dim(zero_stats=False)                                  
        totdim = dim * npdfs                                                                   
        rvec = rvecs[:, idx: idx + totdim].reshape(-1, dim)                                    
        pdfvec = lhf.pdfvectors_from_rvectors(rvec)    
        pdfvec = pdfvec.reshape(-1, npdfs * pdfvec.shape[-1])
        print('pdfvec', pdfvec.shape)
        retval.append(pdfvec)
        #pdfvec = pdfvec.reshape(len(rvecs), npdfs, -1).mean(dim=0)                             
        #retval.append(pdfvec.reshape(param.stats.shape))                                       
        idx += totdim                                                                          
    return torch.cat(retval, dim=-1)

samples = latent_posts.sample(5)
rvecs = gsm._rvecs_from_samples(samples, 6)
shape = rvecs.shape
rvecs = rvecs.reshape(-1, rvecs.shape[-1])
pdfvecs = _pdfvecs_from_rvectors(gsm.model.bayesian_parameters(), rvecs)
pdfvecs.shape

torch.Size([10, 30, 11])
pdfvec torch.Size([300, 16])
pdfvec torch.Size([300, 2])


torch.Size([300, 18])

In [28]:
def _pdfvecs(params, rvecs):
    idx = 0
    for param in params:
        size = len(param)
        lhf = param.likelihood_fn                                                              
        npdfs = len(param)                                                                     
        dim = lhf.sufficient_statistics_dim(zero_stats=False)
        totdim = dim * npdfs   
        param_rvecs = rvecs[:, idx: idx + totdim].reshape(-1, dim)
        pdfvec = lhf.pdfvectors_from_rvectors(param_rvecs)    
        totdim_with_zerostats = npdfs * pdfvec.shape[-1]
        idx += totdim
        yield pdfvec.reshape(-1, totdim_with_zerostats)

        
samples = latent_posts.sample(3)
rvecs = gsm._rvecs_from_samples(samples, 3)
shape = rvecs.shape
rvecs = rvecs.reshape(-1, rvecs.shape[-1])
pdfvecs = torch.cat([pdfvecs for pdfvecs in _pdfvecs(gsm.model.bayesian_parameters(), rvecs)], dim=-1)
pdfvecs = pdfvecs.reshape(shape[0], -1, pdfvecs.shape[-1])
rvecs.shape, pdfvecs.shape

(torch.Size([90, 11]), torch.Size([10, 9, 18]))

In [29]:
stats = []
for gmm in gmms:
    stats.append(torch.cat([param.stats.view(1, -1) for param in gmm.bayesian_parameters()], dim=-1))
stats = torch.cat(stats, dim=0)

In [32]:
avg_pdfvecs = pdfvecs.mean(dim=1)
avg_pdfvecs.shape, stats.shape

(torch.Size([10, 18]), torch.Size([10, 18]))