In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn

from torch.autograd import Variable
from random import shuffle
from scipy.stats import beta
import scipy.special

import sys
sys.path.append('../')
sys.path.append('../../')
from utils import *
from mdns import Trainer, MultivariateMogMDN, PytorchMultivariateMoG

#from sklearn.preprocessing import normalize

%matplotlib inline

import matplotlib as mpl
mpl.rcParams['axes.titlesize'] = 20
mpl.rcParams['axes.labelsize'] = 15
mpl.rcParams['ytick.labelsize'] = 12
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['legend.fontsize'] = 12
mpl.rcParams['figure.figsize'] = (15, 5)

## Define a MDN for approximating a models with multiple parameters

It takes as input the data $x$ **and** the model index $m$. 

In [None]:
n_params = 2  # 2D problem, better visualization

# define a MoG model with n_params + 1 inputs: data dimensions plus model index 
model = MultivariateMogMDN(ndim_input=n_params + 1, ndim_output=2, n_hidden_units=20, n_components=1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

trainer = Trainer(model, optimizer, verbose=True)

## Generate data from different models: Gaussian models

In [None]:
# use different priors on the mean 
prior1 = scipy.stats.multivariate_normal(mean=[0.5, 0.5], cov=np.eye(n_params))
prior2 = scipy.stats.multivariate_normal(mean=[-0.5, -0.5], cov=np.eye(n_params))

# use fixed covariance for both models 
data_cov = 0.5 * np.eye(n_params)

n_samples = 10000
sample_size = 1000

X1, theta1 = generate_nd_gaussian_dataset(n_samples, sample_size, prior1, data_cov=data_cov)
X2, theta2 = generate_nd_gaussian_dataset(n_samples, sample_size, prior2, data_cov=data_cov)

## Put the data into a single data set and add the model index as feature

In [None]:
X = np.vstack((np.hstack((X1, -1 * np.ones(n_samples).reshape(n_samples, 1))), 
               np.hstack((X2, np.ones(n_samples).reshape(n_samples, 1)))))
X, training_norm = normalize(X)
theta = np.vstack((theta1, theta2))

In [None]:
loss_trace = trainer.train(X, theta, n_epochs=100)

In [None]:
plt.plot(loss_trace);

## Now approximate the posterior under different models using generate test data

In [None]:
# generate data
true_mu = [0, 0]
xo = scipy.stats.multivariate_normal.rvs(mean=true_mu, cov=data_cov, size=sample_size).reshape(sample_size, n_params)
# generate stats
so = np.sum(xo, axis=0).reshape(1, 2)
# add model index and normalize
so1, norm = normalize(np.hstack((so, np.array([[-1]]))), training_norm)
so2, norm = normalize(np.hstack((so, np.array([[1]]))), training_norm)
# pytorch 
so1 = Variable(torch.Tensor(so1.tolist()))
so2 = Variable(torch.Tensor(so2.tolist()))

In [None]:
# set up the posteriors using the predicted parametrization from every model
post1 = PytorchMultivariateMoG(*model(so1))
post2 = PytorchMultivariateMoG(*model(so2))

In [None]:
# calculate the true posteriors 
postana1 = scipy.stats.multivariate_normal(*calculate_multivariate_normal_mu_posterior(xo, data_cov, sample_size, 
                                           prior1.mean, prior1.cov))
postana2 = scipy.stats.multivariate_normal(*calculate_multivariate_normal_mu_posterior(xo, data_cov, sample_size, 
                                           prior2.mean, prior2.cov))

In [None]:
# define a grid of values at which to evaluate the posteriors 
r = 1.
x, y = np.mgrid[-r:r:.01, -r:r:.01]
pos = np.dstack((x, y))

In [None]:
# plot the posteriors 
plt.figure(figsize=(15, 10))
plt.subplot(221)

plt.contourf(x, y, post1.eval_numpy(pos))
plt.plot(true_mu[0], true_mu[1], 'ro', label='true mean');
plt.plot(prior1.mean[0], prior1.mean[1], 'ko', label='prior mean');
plt.title('model 1, prior mean {}'.format(prior1.mean));
plt.grid()
plt.legend();

plt.subplot(223)
plt.contourf(x, y, post2.eval_numpy(pos))
plt.plot(true_mu[0], true_mu[1], 'ro')
plt.title('model 2, prior mean {}'.format(prior2.mean));
plt.plot(true_mu[0], true_mu[1], 'ro', label='true mean');
plt.plot(prior2.mean[0], prior2.mean[1], 'ko', label='prior mean');
plt.grid()

# plot the true posteriors under each model 
plt.subplot(222)
plt.contourf(x, y, postana1.pdf(pos))
plt.title('Analytical posterior model 1')
plt.plot(true_mu[0], true_mu[1], 'ro', label='true mean');
plt.grid()

plt.subplot(224)
plt.contourf(x, y, postana2.pdf(pos))
plt.title('Analytical posterior model 2')
plt.plot(true_mu[0], true_mu[1], 'ro', label='true mean');
plt.grid()


## Credible Region check 
Sample a lot of $\theta$s from the prior, get the data $x$, predict the posterior and get the analytical posterior. Check for many different CR, whether $\theta# lies in the interval.  

In [None]:
# sample from gaussian prior 
ntest = 10000
thetas = prior1.rvs(ntest)

In [None]:
# generate corresponding data 
x_test = []
for th in thetas: 
    x_test.append(scipy.stats.multivariate_normal.rvs(mean=th, size=sample_size))
x_test = np.array(x_test)

# calculate summary stats 
sx_test = np.hstack((np.sum(x_test, axis=1), np.ones((1, ntest)).T))

# normalize 
sx_test_zt, _ = normalize(sx_test, training_norm)

In [None]:
prior1.mean

In [None]:
def check_credible_regions(theta_o, cdf_fun, credible_regions):
    
    q = cdf_fun(theta_o)
    
    if q > 0.5:
        # the mass in the CR is 1 - how much mass is above times 2
        cr_mass = 1 - 2 * (1 - q)
    else:
        # or 1 - how much mass is below, times 2
        cr_mass = 1 - 2 * q
    counts = np.ones_like(credible_regions) * (credible_regions > cr_mass)
    return counts

In [None]:
# for each theta, x, sx, predict posterior and get true posterior 
cr = np.arange(0.05, 1., 0.05)
cr_counts = np.zeros((3, cr.size))
cr_counts2 = np.zeros_like(cr)
for ii, (th, x, sxz) in enumerate(zip(thetas, x_test, sx_test_zt)): 
    # predict posterior 
    phat = model.predict(sxz.reshape(1, -1))
    [m1, m2] = phat.get_marginals()
    cr_counts[0, ] += m1.get_credible_interval_counts(th[0], cr)
    cr_counts[1, ] += m2.get_credible_interval_counts(th[1], cr)
    cr_counts[2, ] += phat.check_credible_regions(th, cr)
    
    sigma_0 = prior1.cov 
    mu_0 = prior1.mean
    sigma = np.eye(2)
    sigma_N = np.linalg.inv(np.linalg.inv(sigma_0) + sample_size * np.linalg.inv(sigma))
    mu_N = sigma_N.dot(sample_size * np.linalg.inv(sigma).dot(x.mean(axis=0)) + np.linalg.inv(sigma_0).dot(mu_0))
    post = scipy.stats.multivariate_normal(mean=mu_N, cov=sigma_N)
    
    cr_counts2 += check_credible_regions(th.reshape(1, -1), post.cdf, cr)
cr_probs = cr_counts / ntest
cr_probs2 = cr_counts2 / ntest

In [None]:
plt.plot(cr, cr_probs[0, ], '-x', label='mdn posterior, x1 marginal')
plt.plot(cr, cr_probs[1, ], '-x', label='mdn posterior, x2 marginal')
plt.plot(cr, cr_probs[2, ], '-x', label='mdn posterior, joint')
plt.plot(cr, cr_probs2, '-x', label='analytical posterior')
plt.plot(cr, cr, '-', lw=3)
plt.legend()
plt.ylabel(r'P($\theta$ $\in$ CR | x)')
plt.xlabel('Credible region density')
plt.title('Credible region (CR) probabilities for 2D Gaussian fit on the mean');