In [None]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn

from torch.autograd import Variable
from random import shuffle
from scipy.stats import beta
import scipy.special
import sys 
sys.path.append('../')
from utils import *

%matplotlib inline

import matplotlib as mpl
mpl.rcParams['axes.titlesize'] = 20
mpl.rcParams['axes.labelsize'] = 15
mpl.rcParams['ytick.labelsize'] = 12
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['legend.fontsize'] = 12
mpl.rcParams['figure.figsize'] = (15, 5)

## Define a MDN for approximating a multidimensional Gaussian

It takes as input the data $x$ **and** the model index $m$. 

In [None]:
class MDN(nn.Module):
    def __init__(self, ndim_input=2, ndim_output=2, n_hidden=10, n_components=1):
        super(MDN, self).__init__()
        
        self.ndims = ndim_output 
        # the number of entries in the upper triangular Choleski transform matrix of the precision matrix 
        self.utriu_entries = int(self.ndims * (self.ndims - 1) / 2) + self.ndims

        # input layer 
        self.fc_in = nn.Linear(ndim_input, n_hidden)
        # activation 
        self.tanh = nn.Tanh()

        # output layer the mean estimates 
        self.mu_out = nn.Linear(n_hidden, ndim_output)

        # output layer to precision estimates 
        # the upper triangular matrix for D-dim Gaussian has m = (D**2 + D) / 2 entries 
        # this should be a m-vector for every component. currently it is just a scalar for every component. 
        # or it could be a long vector of length m * k, i.e, all the k vector stacked. 
        self.U_out = nn.Linear(n_hidden, self.utriu_entries * n_components)

    def forward(self, x):
        batch_size = x.size()[0]
        
        out = self.fc_in(x)
        act = self.tanh(out)
        
        out_mu = self.mu_out(act)

        # get activate of upper triangle U vector
        U_vec = self.U_out(act)
        # prelocate U matrix 
        
        U_mat = Variable(torch.zeros(batch_size, self.ndims, self.ndims))
        
        # assign vector to upper triangle of U 
        (idx1, idx2) = np.triu_indices(self.ndims)
        U_mat[:, idx1, idx2] = U_vec
        # apply exponential to get positive diagonal
        (idx1, idx2) = np.diag_indices(self.ndims)
        U_mat[:, idx1, idx2] = torch.exp(U_mat[:, idx1, idx2])

        return (out_mu, U_mat)

In [None]:
# the loss evaluates model (MoG) with the given data (y) and takes the log loss
def mdn_loss_function(y, mu, U):
    
#    result = ND_gauss_pdf(y, mu, U, log=True)
    result = multivariate_normal_pdf(y, mu, U, log=True)    
    
    result = torch.mean(result)  # mean over batch
    return -result

In [None]:
def generate_2DGaussian_dataset(n_samples, sample_size, prior, data_cov=None):

    X = []
    thetas = []
    
    if data_cov is None: 
        data_cov = .5 * np.eye(2)
    
    for i in range(n_samples): 
        # sample from the prior 
        theta = prior.rvs()

        # generate samples with mean from prior and unit variance 
        x = scipy.stats.multivariate_normal.rvs(mean=theta, cov=data_cov, size=sample_size).reshape(sample_size, 2)
        
        sx = stats_ND_Gaussian(x)

        # as data we append the summary stats
        X.append(sx) 
        thetas.append([theta])   
    
    return np.array(X).squeeze(), np.array(thetas).squeeze()

def stats_ND_Gaussian(x): 
    """
    Calculate the sufficient statistics of a multivariate Gaussian sample x
    """
    return np.array([np.sum(x, axis=0).astype(float)])

def batch_generator(dataset, batch_size=5):
    shuffle(dataset)
    N_full_batches = len(dataset) // batch_size
    for i in range(N_full_batches):
        idx_from = batch_size * i
        idx_to = batch_size * (i + 1)
        xs, ys = zip(*[(x, y) for x, y in dataset[idx_from:idx_to]])
        yield xs, ys
        
def train(X, Y, n_epochs=500, n_minibatch=50):
    dataset_train = [(x, y) for x, y in zip(X, Y)]
    
    losses = []
    
    for epoch in range(n_epochs): 
        bgen = batch_generator(dataset_train, n_minibatch)

        for j, (x_batch, y_batch) in enumerate(bgen):
            x_var = Variable(torch.Tensor(x_batch))
            y_var = Variable(torch.Tensor(y_batch))
                        
            (out_mu, out_U) = model(x_var)
            loss = mdn_loss_function(y_var, out_mu, out_U)
            
            optim.zero_grad()
            loss.backward()
            optim.step()
            
            losses.append(loss.data.numpy())

        if (epoch + 1) % 10 == 0:
            print("[epoch %04d] loss: %.4f" % (epoch + 1, loss.data[0]))
            
    return losses


In [None]:
model = MDN(ndim_input=2, n_components=1)
optim = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
n_samples = 10000
sample_size = 10
    
# prior on the mean
data_cov = 0.5 * np.eye(2)
prior = scipy.stats.multivariate_normal(mean=[0., 0.], cov=2. * np.eye(2))


X, Y = generate_2DGaussian_dataset(n_samples, sample_size, prior, data_cov)
X, norm = normalize(X)

In [None]:
loss = train(X, Y, n_epochs=50, n_minibatch=100)
plt.plot(loss);

In [None]:
# observe data 
true_mu = [-.5, .1]
true_cov = data_cov
xo = scipy.stats.multivariate_normal.rvs(mean=true_mu, cov=true_cov, size=sample_size).reshape(sample_size, 2)
# gets stats 
statso = stats_ND_Gaussian(xo)
# normalize 
statso, norm = normalize(statso, norm)
# cast to torch 
statso = Variable(torch.Tensor(statso))

In [None]:
plt.figure(figsize=(15, 10))
plt.subplot(211)
plt.hist2d(X[:, 0], X[:, 1])
plt.title('Samples generated by the model: 2D Gaussian with prior on $\mu$');
plt.colorbar();
plt.subplot(212)
plt.title('observed data')
plt.hist2d(xo[:, 0], xo[:, 1])
plt.colorbar();

In [None]:
# predict with observed data 
(out_mu, out_U) = model(statso.view(1, 2))
Sin = torch.mm(torch.transpose(out_U.view(2, 2), 0, 1), out_U.view(2, 2))

# convert to numpy 
mean = out_mu.data.numpy().squeeze()
cov = torch.inverse(Sin).data.numpy()

In [None]:
posterior = scipy.stats.multivariate_normal(mean=mean, cov=cov)
# get analystical posterior 
post_mean, post_cov = calculate_multivariate_normal_mu_posterior(xo, data_cov, sample_size, 
                                                                prior.mean, prior.cov)
postana = scipy.stats.multivariate_normal(mean=post_mean, cov=post_cov)

In [None]:
def calculate_multivariate_normal_mu_posterior(X, sigma, N, mu_0, sigma_0): 
    sigma_N = np.linalg.inv(np.linalg.inv(sigma_0) + N * np.linalg.inv(sigma))
    mu_N = sigma_N.dot(N * np.linalg.inv(sigma).dot(X.mean(axis=0)) + np.linalg.inv(sigma_0).dot(mu_0))
    
    return mu_N, sigma_N

In [None]:
r = 1.
x, y = np.mgrid[-r:r:.01, -r:r:.01]
pos = np.dstack((x, y))
plt.figure(figsize=(15, 12))
plt.subplot(311)
plt.contourf(x, y, prior.pdf(pos))
plt.plot(true_mu[0], true_mu[1], 'ro')
plt.title('Prior')
plt.subplot(312)
plt.contourf(x, y, posterior.pdf(pos))
plt.title('Posterior')
plt.plot(true_mu[0], true_mu[1], 'ro');
plt.subplot(313)
plt.contourf(x, y, postana.pdf(pos))
plt.title('Analytical Posterior')
plt.plot(true_mu[0], true_mu[1], 'ro');