In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pymc3 as pm
import scipy.stats
from sampled import sampled
import seaborn as sns
import theano.tensor as tt
import pandas as pd

# Concentration of measure
We locate the typical set in higher dimensions. As dimension increases, the typical set of a gaussian becomes concentrated on a "narrower" region.

In [None]:
for N in (2, 4, 8, 16, 32):
    x = scipy.stats.multivariate_normal(cov=np.eye(N) / N).rvs(size=10000)
    _ = sns.distplot(np.linalg.norm(x, axis=1))

plt.xlim(0, 2.5)
plt.ylim(0, 3.5)

In [None]:
def jointplot(ary):
    """Helper to plot everything consistently"""
    sns.jointplot(*ary.T, alpha=0.1, stat_func=None, xlim=(-1.2, 1.2), ylim=(-1.2, 1.2))

# Unit Shell Distribution

https://chi-feng.github.io/mcmc-demo/app.html

In [None]:
def tt_unit_shell_pdf(scale):
    def logp(x):
         return -tt.square((1 - x.norm(2)) / scale)
    return logp

@sampled
def unit_shell(dim=2, scale=0.001, **observed):
    testval = np.ones(dim) / np.linalg.norm(np.ones(dim))
    pm.DensityDist('unit_shell', logp=tt_unit_shell_pdf(scale), shape=dim, testval=testval)

In [None]:
with unit_shell(dim=3, scale=0.01):
    step = pm.Metropolis()
    metropolis_sample = pm.sample(draws=1000, step=step)

In [None]:
df = pd.DataFrame(metropolis_sample.get_values('unit_shell'))

sns.pairplot(df)
pm.traceplot(metropolis_sample, ['unit_shell'], figsize=(12,6))

# Multivariate Gaussian

Finding a multivariate gaussian without adapatation and a "bad" initial guess sample

In [None]:
def create_covariance(dim, off_diag=0.5, verbose=False):
    out = np.ones((dim, dim)) * off_diag
    np.fill_diagonal(out, 1) / dim
    if verbose:
        print(out)
    return out

In [None]:
def tt_gaussian_pdf(mean, cov):
    mean = np.asarray(mean)
    cov = np.asarray(cov)
    dim = mean.shape[0]
    
    constant = -np.log((2*np.pi)**dim * np.linalg.det(cov))/2
    covinv = np.linalg.inv(cov)
    def logp(x):
        return constant - tt.dot(tt.dot((x - mean).T, covinv), (x - mean))/2
    return logp

@sampled
def gauss(mean=[0,0], cov=[[1,0],[0,1]], **observed):
    mean = np.asarray(mean)
    cov = np.asarray(cov)
    dim = mean.shape[0]
    testval = np.zeros(dim)
    pm.DensityDist('gauss', logp=tt_gaussian_pdf(mean, cov), shape=dim, testval=testval)

In [None]:
dim = 5
mean = np.zeros(dim)
cov = np.eye(dim) / dim

starting_point = np.ones(dim) * 5

with gauss(mean=mean, cov=cov):
    step = pm.Metropolis()
    metropolis_sample = pm.sample(draws=1000, step=step, start={'gauss': starting_point}, 
                                  tune=0, discard_tuned_samples=False)

In [None]:
df = pd.DataFrame(metropolis_sample.get_values('gauss'))
sns.pairplot(df)
pm.traceplot(metropolis_sample, ['gauss'], figsize=(12,6))

# Banana

In [None]:
def tt_banana_pdf(mean, cov, warp):
    mean = np.asarray(mean)
    cov = np.asarray(cov)
    dim = mean.shape[0]
    
    constant = -np.log((2*np.pi)**dim * np.linalg.det(cov))/2
    covinv = np.linalg.inv(cov)
    
    def logp(x):
        distortion = np.ones(dim) * warp * x[0]**2
        tt.set_subtensor(distortion[0], 0)
        return constant - tt.dot(tt.dot((x + distortion - mean).T, covinv), (x + distortion - mean))/2
    return logp

@sampled
def banana(mean=[0,0], cov=[[1,0],[0,1]], warp=0.9, **observed):
    mean = np.asarray(mean)
    cov = np.asarray(cov)
    dim = mean.shape[0]
    testval = np.zeros(dim)
    pm.DensityDist('banana', logp=tt_banana_pdf(mean, cov, warp), shape=dim, testval=testval)

In [None]:
dim = 5
mean = np.zeros(dim)
cov = np.eye(dim)/dim
warp = 1

starting_point = np.ones(dim) * 10

with banana(mean=mean, cov=cov, warp=warp):
    step = pm.Metropolis()
    metropolis_sample = pm.sample(draws=10000, step=step, start={'banana': starting_point}, 
                                  tune=500, discard_tuned_samples=True)

In [None]:
df = pd.DataFrame(metropolis_sample.get_values('banana'))
sns.pairplot(df)