In [1]:
import numpy as np
import pandas as pd
import pystan
from scipy.stats import norm, multivariate_normal, invwishart, invgamma
from statsmodels.tsa.stattools import acf
import datetime
import sys
import os

from codebase.plot import * 
from codebase.file_utils import save_obj, load_obj

%matplotlib inline

%load_ext autoreload
%autoreload 2

In [11]:

np.random.seed(1)
J = 4
K = 2
M = 3
nsim_data = 500

alpha = np.array([1,2,.3,-.8])
beta = np.array([[1,.4], [.2, -1],[.6,0],[.8,1]], dtype=float)
Sigma_e = np.eye(J)

Omega = beta @ beta.T + Sigma_e
yy = multivariate_normal.rvs(mean = alpha, cov=Omega, size=nsim_data)


data = dict()
data['N'] = nsim_data
data['K'] = 2
data['M'] = 3
data['J'] = 4
data['alpha'] = alpha
data['beta'] = beta
data['Omega'] = Omega
data['Sigma_e'] = Sigma_e
data['y'] = yy
bins = np.array([0, 2, 1000])
data['y_ord'] = np.digitize(data['y'], bins)+1
data['cutpoints'] = bins[:-1]

In [12]:
np.unique(data['y_ord'])

array([1, 2, 3])

In [17]:
data['cutpoints']

array([0, 2])

In [13]:
stan_data = dict(N = data['N'], K = data['K'], J = data['J'], M = data['M'], DD = data['y_ord'])

In [14]:
with open('./codebase/stan_code/ord/marg.stan', 'r') as file:
    model_code = file.read()
sm = pystan.StanModel(model_code=model_code, verbose=False)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_9b0082688f86c56855d020e13c4c8774 NOW.


In [15]:
op = sm.optimizing(data=stan_data)


In [16]:
print(op['cutpoints'])

[-0.7834278  1.8864281]


In [18]:
fit_run = sm.sampling(data=stan_data, iter=1000, chains=1)

To run all diagnostics call pystan.check_hmc_diagnostics(fit)


In [19]:
param_names = ['Omega', 'beta', 'alpha', 'cutpoints', 'yy']

stan_samples= fit_run.extract(permuted=False, pars=param_names)  # return a dictionary of arrays

ps = dict()
for name in param_names:
    ps[name] = np.squeeze(stan_samples[name])


In [20]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
    for k in range(data['K']):
        plots.append(plot_trace(ps['beta'][:,j,k],
             true_value=data['beta'][j,k],
             title = 'Posterior distribution for beta(%s,%s)'%(j,k)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [21]:
ps['cutpoints']

array([[ 2.39008224e-01,  4.11104962e+00],
       [ 3.52511749e-02,  4.10174830e+00],
       [ 1.50056415e-01,  4.17377584e+00],
       [ 4.44385183e-02,  4.06220109e+00],
       [-7.72241892e-02,  3.99197002e+00],
       [-3.11931271e-03,  4.04253561e+00],
       [-5.70038581e-03,  4.08446359e+00],
       [-3.86736347e-02,  4.04701167e+00],
       [ 1.70817638e-02,  3.97981392e+00],
       [-1.40809505e-01,  4.04622950e+00],
       [ 6.71828277e-02,  4.00085837e+00],
       [ 1.88845148e-01,  3.98664260e+00],
       [-1.01040827e-01,  4.00595484e+00],
       [ 1.39378163e-01,  3.92171043e+00],
       [ 9.93635440e-02,  4.02691041e+00],
       [ 7.92836215e-02,  4.10287715e+00],
       [ 5.19784840e-02,  3.95105549e+00],
       [-1.90290137e-02,  3.81551784e+00],
       [-3.66911007e-02,  3.84019829e+00],
       [-1.42787569e-01,  3.75541829e+00],
       [-1.15800597e-01,  3.68833071e+00],
       [-2.05650359e-01,  3.63124009e+00],
       [-1.75834822e-02,  3.67269158e+00],
       [-1.

In [22]:
%%opts Layout [fig_size=200]
plots = []
for k in range(data['K']):
    plots.append(plot_trace(ps['cutpoints'][:,k],
         true_value=data['cutpoints'][k],
         title = 'Posterior distribution for cutpoints(%s)'%(k)).\
                 options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)
