In [1]:
import numpy as np
import pandas as pd
import pystan
from scipy.stats import norm, multivariate_normal, invwishart, invgamma
from statsmodels.tsa.stattools import acf
import datetime
import sys
import os

from codebase.plot import * 
from codebase.file_utils import save_obj, load_obj
from codebase.post_process import * 

%matplotlib inline

%load_ext autoreload
%autoreload 2

  from pandas.core import datetools


In [2]:
np.random.seed(121)
nsim_data = 200
J = 6
K = 1
beta = np.array([.8, .2, .1, .4, .9, .7], dtype=float)
mu = np.array([1,2,.3,-.8, 1, -1.4])
sigma = np.array([1,1.2,.9,.8, 1, 1.4])
Sigma_e = np.diag(sigma**2)
Sigma_u = np.eye(J)*0.1**2
ee = multivariate_normal.rvs(mean = np.zeros(J), cov = Sigma_e, size=nsim_data)
uu = multivariate_normal.rvs(mean = np.zeros(J), cov = Sigma_u, size=nsim_data)
zz = multivariate_normal.rvs(mean = np.zeros(K), cov=np.eye(K), size=nsim_data)
yy = mu + np.outer(zz, beta) + ee + uu

In [3]:
data = dict()
data['N'] = nsim_data
data['K'] = K
data['J'] = J
data['Sigma_e'] = Sigma_e
data['sigma'] = sigma
data['y'] = yy
data['beta'] = beta
data['e'] = ee
data['mu'] = mu
data['Omega'] = np.outer(data['beta'], data['beta']) + data['Sigma_e']
data

{'J': 6,
 'K': 1,
 'N': 200,
 'Omega': array([[1.64, 0.16, 0.08, 0.32, 0.72, 0.56],
        [0.16, 1.48, 0.02, 0.08, 0.18, 0.14],
        [0.08, 0.02, 0.82, 0.04, 0.09, 0.07],
        [0.32, 0.08, 0.04, 0.8 , 0.36, 0.28],
        [0.72, 0.18, 0.09, 0.36, 1.81, 0.63],
        [0.56, 0.14, 0.07, 0.28, 0.63, 2.45]]),
 'Sigma_e': array([[1.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
        [0.  , 1.44, 0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.81, 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.64, 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  , 1.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  , 0.  , 1.96]]),
 'beta': array([0.8, 0.2, 0.1, 0.4, 0.9, 0.7]),
 'e': array([[-0.57389821, -0.341915  , -0.2970995 ,  0.94695566, -0.44031017,
         -0.29684643],
        [-0.01411931,  0.44047496,  1.53867666, -1.06158989,  0.6296418 ,
          2.2615221 ],
        [ 1.40382596, -0.229712  , -0.71137365, -0.58617728, -1.96876855,
          0.56262237],
        ...,
        [-0.88249638, -1.74927686, -0.

In [4]:
stan_data = dict(N = data['N'], J = data['J'], yy = data['y'])

In [5]:
with open('./codebase/stan_code/cont_marg_CFA1.stan', 'r') as file:
    model_code = file.read()


In [6]:
sm = pystan.StanModel(model_code=model_code, verbose=False)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_0d1dedeb43585e5f0c4ea526cb891536 NOW.


In [7]:
nowstr = datetime.datetime.now().strftime('%Y%m%d_%H%M%S_') # ISO 8601 format
task_id = 'CFA1_NN'
log_dir =  "./log/"+nowstr+"%s/" % task_id

In [8]:
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

In [9]:
num_chains = 1
num_samples = 1000
num_warmup = 1000
num_iter = num_samples + num_warmup

In [10]:
fit_run = sm.sampling(data=stan_data, iter=num_iter, chains=num_chains)

To run all diagnostics call pystan.check_hmc_diagnostics(fit)


In [11]:
save_obj(sm, 'sm', log_dir)
save_obj(fit_run, 'fit', log_dir)


The relevant StanModel instance must be pickled along with this fit object.
When unpickling the StanModel must be unpickled first.
  pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)


In [13]:
fit = fit_run

In [6]:
# log_dir = "./log/20190401_171635_CFA1_NN/"
# sm = load_obj('sm', log_dir)
# fit = load_obj('fit', log_dir)



In [14]:
param_names = ['Omega', 'uu', 'beta', 'mu', 'sigma']

stan_samples= fit.extract(permuted=False, pars=param_names)  # return a dictionary of arrays

if num_chains ==1:
    ps = dict()
    for name in param_names:
        ps[name] = np.squeeze(stan_samples[name])
else: 
    ps = stan_samples

In [15]:
%%opts Curve {+axiswise} [width=600, height=200, tools=['hover']] 
plots = []
for i in range(data['J']):
    for j in range(data['J']):
            plots.append(plot_trace(ps['Omega'][:,i,j],
                     true_value=data['Omega'][i,j],
                     title = 'Posterior distribution for Omega(%s,%s)'%(i,j)))
layout = hv.Layout(plots)
layout.cols(1)

In [16]:
%%opts Curve {+axiswise} [width=600, height=200, tools=['hover']] 
plots = []
for i in range(data['J']):
    for j in range(data['J']):
            plots.append(plot_trace(acf(ps['Omega'][:,i,j]),
                     title = 'Autocorrelation of Omega(%s,%s)'%(i,j)))
layout = hv.Layout(plots)
layout.cols(1)

In [17]:
%%opts Curve {+axiswise} [width=600, height=200, tools=['hover']] 
plots = []
for i in range(data['J']):
            plots.append(plot_trace(ps['mu'][:,i],
                     true_value=data['mu'][i],
                     title = 'Posterior distribution for mu(%s)'%(i)))
layout = hv.Layout(plots)
layout.cols(1)

In [18]:
%%opts Curve {+axiswise} [width=600, height=200, tools=['hover']] 
plots = []
for i in range(data['J']):
            plots.append(plot_trace(acf(ps['mu'][:,i]),
                     title = 'Autocorrelation of mu(%s)'%(i)))
layout = hv.Layout(plots)
layout.cols(1)

In [19]:
%%opts Curve {+axiswise} [width=600, height=200, tools=['hover']] 
plots = []
for i in range(data['J']):
            plots.append(plot_trace(ps['sigma'][:,i],
                     true_value=data['sigma'][i],
                     title = 'Posterior distribution for sigma(%s)'%(i)))
layout = hv.Layout(plots)
layout.cols(1)

# Find mode of $\gamma$

In [17]:
b = ps['beta']
kde_mode(b)

array([ 1.        , -1.22880346, -0.08263687, -0.15410583, -0.13410083,
       -1.36468814])

In [20]:
%%opts Curve {+axiswise} [width=600, height=200, tools=['hover']] 
plots = []
for i in range(data['J']):
            plots.append(plot_trace(ps['beta'][:,i],
                     title = 'Posterior distribution for beta(%s,%s)'%(i,j)))
layout = hv.Layout(plots)
layout.cols(1)

In [21]:
%%opts Curve {+axiswise} [width=600, height=200, tools=['hover']] 
plots = []
for i in range(data['J']):
            plots.append(plot_trace(acf(ps['beta'][:,i]),
                     title = 'Autocorrelation of gamma(%s,%s)'%(i,j)))
layout = hv.Layout(plots)
layout.cols(1)

  acf = avf[:nlags + 1] / avf[0]
  return (np.nanmin(column), np.nanmax(column))
