# Donut in PyMC3

In [None]:
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import pickle
from timeit import default_timer as timer
import numpy as np
import theano.tensor as tt
from scipy.stats.kde import gaussian_kde
from scipy.integrate import quad, nquad

from utils import generate_datasets, SEED

import pymc3 as pm

In [2]:
pm.__version__

'3.4.1'

## 1. Model

In [3]:
def pymc3_model(x, n_samples, n_dim):
    with pm.Model() as model:
        R = pm.HalfNormal('R', 10)
        r = pm.HalfNormal('r', 10)
        C = pm.Normal('C', 10, shape=n_dim)
        v = pm.Normal('v', np.zeros(n_dim), np.ones(n_dim), shape=(n_samples,n_dim))   
        v = v / v.norm(L=2, axis=1).reshape((v.shape[0], 1))
        x_obs = pm.Normal('y_obs', C + v*R , r, observed=x)
    return model

## 2. Inference

### NUTS

In [12]:
def pymc3_nuts(n_samples, n_dim, seeds=SEED):
  """
  Runs PyMC3's NUTS algorithm for each seed
  """
  Y, C, R, r = generate_datasets(n_samples, n_dim, seeds)
  for seed, y in zip(seeds, Y):
    model = pymc3_model(y, n_samples, n_dim)
    with model:
        print('...')
        start = timer()
        trace = pm.sample(draws=1000, tune=1000, chains=4, cores=4, nuts_kwargs=dict(target_accept=.99)) # max_treedepth=10
        end = timer()
    divergent = trace['diverging'].nonzero()[0].size
    R_ = np.array(trace.get_values('R', combine=False)).mean(axis=0)[:,None]
    r_ = np.array(trace.get_values('r', combine=False)).mean(axis=0)[:,None]
    C_ = np.array(trace.get_values('C', combine=False)).mean(axis=0)
    results = {'R': R_, 'r': r_, 'C': C_, 'iters': 2000, 'warmup': 1000, 'time': end-start, 'divergences': divergent}
    
  print('Done')

In [None]:
# Small dataset
n_samples = 1000
n_dim = 2
pymc3_nuts(n_samples, n_dim)

In [None]:
# Big dataset
n_samples = 5000
n_dim = 5
pymc3_nuts(n_samples, n_dim)

### ADVI

In [3]:
def pymc3_vi(n_samples, n_dim, seeds=SEED):
  """
  Runs PyMC3's ADVI algorithm (meanfield approximation) for each seed
  """
  Y, C, R, r = generate_datasets(n_samples, n_dim, seeds)
  for seed, y in zip(seeds, Y):
    model = pymc3_model(y, n_samples, n_dim)
    iters = np.linspace(1000, 50000, 5).astype(int)
    for n in iters:
      with model:
        start = timer()
        advi_fit = pm.fit(n=n, random_seed=seed, callbacks=[pm.callbacks.CheckParametersConvergence(diff='absolute', tolerance=0.0001)])
        end = timer()
        print('Time: ', end-start)
        trace = advi_fit.sample(draws=1000)
        R_ = np.array(trace.get_values('R'))
        r_ = np.array(trace.get_values('r'))
        C_ = np.array(trace.get_values('C'))
                      
        results = {'iters': n, 'tol': 0.0001, 'time': end-start, 'R': R_, 'r': r_, 'C': C_}
        pickle.dump(results, open('results/pymc3/vi_{}d_{}.pkl'.format(n_dim, seed), 'ab'))
    print('Done')

In [None]:
# Small dataset
n_samples = 1000
n_dim = 2
pymc3_vi(n_samples, n_dim, seeds=SEED)

In [None]:
# Big dataset
n_samples = 5000
n_dim = 5
pymc3_vi(n_samples, n_dim, seeds=SEED)