In [1]:
import numpy as np
import pandas as pd
import pystan
from scipy.stats import norm, multivariate_normal, invwishart, invgamma
from statsmodels.tsa.stattools import acf
import datetime
import sys
import os

from codebase.plot import * 
from codebase.file_utils import save_obj, load_obj

%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:

np.random.seed(1)
J = 4
K = 2
M = 3
nsim_data = 500

alpha = np.array([1,2,.3,-.8])
beta = np.array([[1,.4], [.2, -1],[.6,0],[.8,1]], dtype=float)
Sigma_e = np.eye(J)

Omega = beta @ beta.T + Sigma_e
yy = multivariate_normal.rvs(mean = alpha, cov=Omega, size=nsim_data)


data = dict()
data['N'] = nsim_data
data['K'] = 2
data['M'] = 3
data['J'] = 4
data['alpha'] = alpha
data['beta'] = beta
data['Omega'] = Omega
data['Sigma_e'] = Sigma_e
data['y'] = yy
bins = np.array([0, 2, 1000])
data['y_ord'] = np.digitize(data['y'], bins)+1
data['cutpoints'] = bins[:-1]

In [12]:
np.unique(data['y_ord'])

array([1, 2, 3])

In [17]:
data['cutpoints']

array([0, 2])

In [3]:
stan_data = dict(N = data['N'], K = data['K'], J = data['J'], M = data['M'], DD = data['y_ord'])

In [4]:
with open('./codebase/stan_code/ord/marg.stan', 'r') as file:
    model_code = file.read()
sm = pystan.StanModel(model_code=model_code, verbose=False)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_9b0082688f86c56855d020e13c4c8774 NOW.


In [5]:
op = sm.optimizing(data=stan_data)


In [6]:
print(op['cutpoints'])

[-0.8485896   1.82221409]


In [7]:
fit_run = sm.sampling(data=stan_data, iter=1000, chains=1)

To run all diagnostics call pystan.check_hmc_diagnostics(fit)


In [11]:
task_handle = "ord1"
print("\n\nCreating directory")
nowstr = datetime.datetime.now().strftime('%Y%m%d_%H%M%S_') # ISO 8601 format
log_dir =  "./log/"+nowstr+"%s/" % task_handle


if not os.path.exists(log_dir):
    os.makedirs(log_dir)



Creating directory


In [12]:
print("\n\nSaving fitted model in directory %s"%log_dir)
save_obj(fit_run, 'fit', log_dir)



Saving fitted model in directory ./log/20190617_093549_ord1/


The relevant StanModel instance must be pickled along with this fit object.
When unpickling the StanModel must be unpickled first.
  pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)


In [13]:
param_names = ['Omega', 'beta', 'alpha', 'cutpoints', 'yy']

stan_samples= fit_run.extract(permuted=False, pars=param_names)  # return a dictionary of arrays

ps = dict()
for name in param_names:
    ps[name] = np.squeeze(stan_samples[name])


In [14]:
save_obj(ps, 'ps', log_dir)

In [15]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
    for k in range(data['K']):
        plots.append(plot_trace(ps['beta'][:,j,k],
             true_value=data['beta'][j,k],
             title = 'Posterior distribution for beta(%s,%s)'%(j,k)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=True) # use same y-range for all plots?

layout.cols(2)


In [17]:
%%opts Layout [fig_size=200]
plots = []
for k in range(data['K']):
    plots.append(plot_trace(ps['cutpoints'][:,k],
         true_value=data['cutpoints'][k],
         title = 'Posterior distribution for cutpoints(%s)'%(k)).\
                 options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=True) # use same y-range for all plots?

layout.cols(2)
