In [2]:
import numpy as np
import pandas as pd
import sys
import pickle 
import pystan
from scipy.stats import norm, multivariate_normal, invwishart, invgamma, bernoulli
from scipy.special import expit


import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
np.random.seed(121)
nsim_data = 200
J = 6
K = 2
beta = np.array([[1,0], [-2, 2],[-1,-1],[2,1], [3,-1], [1,-1]], dtype=float)
mu = np.array([1,2,.3,-.8, 1, -1.4])
mu = np.zeros(J)
Sigma = np.eye(J)
ee = multivariate_normal.rvs(mean = np.zeros(J), cov = Sigma, size=nsim_data)
zz = multivariate_normal.rvs(mean = np.zeros(K), cov=np.eye(K), size=nsim_data)
yy = mu + zz@beta.T + ee
DD = bernoulli.rvs(p=expit(yy)); DD

# err = np.abs((beta@beta.T + Sigma) - np.cov(yy.T))

array([[0, 1, 0, 0, 0, 1],
       [1, 0, 1, 1, 1, 0],
       [1, 1, 1, 0, 0, 0],
       ...,
       [0, 0, 0, 1, 1, 0],
       [0, 1, 1, 0, 0, 0],
       [0, 0, 1, 0, 1, 0]])

In [20]:
data = dict()
data['N'] = nsim_data
data['K'] = K
data['J'] = J
data['Sigma'] = Sigma
data['y'] = yy
data['D'] = DD
data['beta'] = beta
data['e'] = ee
data['mu'] = mu
data['Omega'] = data['beta']@data['beta'].T + data['Sigma']

In [5]:
stan_data = dict(N = data['N'], K = data['K'], J = data['J'], DD = data['D'])


In [8]:
with open('model_code2.stan', 'r') as file:
    model_code = file.read()


In [9]:
sm = pystan.StanModel(model_code=model_code, verbose=True)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_81c4680d654676d7125426f5e52b9053 NOW.
INFO:pystan:OS: darwin, Python: 3.6.4 | packaged by conda-forge | (default, Dec 23 2017, 16:54:01) 
[GCC 4.2.1 Compatible Apple LLVM 6.1.0 (clang-602.0.53)], Cython 0.29.5


Compiling /var/folders/9j/qb9gcwnj2lnb77886qjkmnfm0000gn/T/tmp6dqbykf9/stanfit4anon_model_81c4680d654676d7125426f5e52b9053_6873875913072797016.pyx because it changed.
[1/1] Cythonizing /var/folders/9j/qb9gcwnj2lnb77886qjkmnfm0000gn/T/tmp6dqbykf9/stanfit4anon_model_81c4680d654676d7125426f5e52b9053_6873875913072797016.pyx
building 'stanfit4anon_model_81c4680d654676d7125426f5e52b9053_6873875913072797016' extension
creating /var/folders/9j/qb9gcwnj2lnb77886qjkmnfm0000gn/T/tmp6dqbykf9/var
creating /var/folders/9j/qb9gcwnj2lnb77886qjkmnfm0000gn/T/tmp6dqbykf9/var/folders
creating /var/folders/9j/qb9gcwnj2lnb77886qjkmnfm0000gn/T/tmp6dqbykf9/var/folders/9j
creating /var/folders/9j/qb9gcwnj2lnb77886qjkmnfm0000gn/T/tmp6dqbykf9/var/folders/9j/qb9gcwnj2lnb77886qjkmnfm0000gn
creating /var/folders/9j/qb9gcwnj2lnb77886qjkmnfm0000gn/T/tmp6dqbykf9/var/folders/9j/qb9gcwnj2lnb77886qjkmnfm0000gn/T
creating /var/folders/9j/qb9gcwnj2lnb77886qjkmnfm0000gn/T/tmp6dqbykf9/var/folders/9j/qb9gcwnj2lnb77886qjkmnfm0

In [10]:
fit = sm.sampling(data=stan_data, iter=2000, chains=4)


To run all diagnostics call pystan.check_hmc_diagnostics(fit)


In [14]:
pystan.stansummary(fit, pars=['beta'])

'Inference for Stan model: anon_model_81c4680d654676d7125426f5e52b9053.\n4 chains, each with iter=2000; warmup=1000; thin=1; \npost-warmup draws per chain=1000, total post-warmup draws=4000.\n\n            mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat\nbeta[1,1]  -0.02    0.03   0.73  -1.23  -0.65  -0.02   0.61   1.23    755   1.01\nbeta[2,1]   0.02    0.06   1.65  -2.75  -1.44   0.06   1.49   2.74    717   1.01\nbeta[3,1]   0.05    0.02   0.73  -1.36  -0.45   0.04   0.53   1.51    878    1.0\nbeta[4,1]  -0.02    0.04   1.16  -2.12  -0.93  -0.08   0.91   2.13    724    1.0\nbeta[5,1]  -0.06    0.08   2.18  -3.51   -2.1  -0.02   1.95   3.51    763    1.0\nbeta[6,1]  -0.02    0.04   1.15  -1.93  -1.04  -0.02   1.01   1.93    718   1.01\nbeta[1,2]  -0.02    0.03   0.72  -1.23  -0.63  -0.03   0.59   1.23    593    1.0\nbeta[2,2]   0.02    0.06   1.63  -2.75  -1.45   0.08   1.46    2.7    736    1.0\nbeta[3,2]   0.04    0.03   0.75  -1.44  -0.46   0.04   0.53   1.55  

In [15]:
post_samples = fit.extract(permuted=True)  # return a dictionary of arrays
pickle.dump(post_samples, open("post_samples2.p", "wb"))

In [16]:
ps = post_samples

In [17]:
sys.path.append('../../src/codebase/')

from plot import *


In [18]:
%%opts Curve {+axiswise} [width=600, height=200, tools=['hover']] 
plots = []
for i in range(data['J']):
    for j in range(data['J']):
            plots.append(plot_trace(ps['Omega'][:,i,j],
                     true_value=data['Omega'][i,j],
                     title = 'Posterior distribution for Omega(%s,%s)'%(i,j)))
layout = hv.Layout(plots)
layout.cols(1)

In [21]:
%%opts Curve {+axiswise} [width=600, height=200, tools=['hover']] 
plots = []
for i in range(data['J']):
            plots.append(plot_trace(ps['mu'][:,i],
                     true_value=data['mu'][i],
                     title = 'Posterior distribution for mu(%s,%s)'%(i,j)))
layout = hv.Layout(plots)
layout.cols(1)

In [22]:
%%opts Curve {+axiswise} [width=600, height=200, tools=['hover']] 
plots = []
for i in range(data['J']):
    for j in range(data['K']):
        if i==0 and j==1:
            pass
        else:
            plots.append(plot_trace(ps['beta'][:,i,j],
                true_value=data['beta'][i,j],
                title = 'Posterior distribution for beta(%s,%s)'%(i,j)))
layout = hv.Layout(plots)
layout.cols(1)