In [123]:
import numpy as np
import pandas as pd
import pystan
from scipy.stats import norm, multivariate_normal, invwishart, invgamma, bernoulli
from statsmodels.tsa.stattools import acf
import datetime
import sys
import os

from tqdm.notebook import tqdm
from codebase.plot import * 
from codebase.data import * 
from codebase.file_utils import save_obj, load_obj

from modelresultsbinary import *
%matplotlib inline

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [124]:
log_dir = "./log/20200123_172552_test_sim0_m1//"
data = load_obj('data', log_dir)
ps = load_obj('ps', log_dir)
ps.keys()


dict_keys(['beta', 'alpha', 'yy'])

In [125]:
print(data['N'])
print(ps['alpha'].shape)

1000
(10, 6)


In [126]:
data

{'random_seed': 0,
 'N': 1000,
 'K': 2,
 'J': 6,
 'alpha': array([0., 0., 0., 0., 0., 0.]),
 'beta': array([[1. , 0. ],
        [0.8, 0. ],
        [0.8, 0. ],
        [0. , 1. ],
        [0. , 0.8],
        [0. , 0.8]]),
 'Theta': array([[1., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 1.]]),
 'e': None,
 'Phi_cov': array([[1., 0.],
        [0., 1.]]),
 'y': array([[ 1.76405235,  1.41124188,  1.41124188,  0.40015721,  0.32012577,
          0.32012577],
        [ 0.97873798,  0.78299039,  0.78299039,  2.2408932 ,  1.79271456,
          1.79271456],
        [ 1.86755799,  1.49404639,  1.49404639, -0.97727788, -0.7818223 ,
         -0.7818223 ],
        ...,
        [ 0.19782817,  0.15826253,  0.15826253,  0.0977508 ,  0.07820064,
          0.07820064],
        [ 1.40152342,  1.12121873,  1.12121873,  0.15843385,  0.12674708,
          0.12674708],

In [127]:
num_chains = 1
num_samples = ps['alpha'].shape[0]
num_warmup = ps['alpha'].shape[0]
num_iter = num_samples + num_warmup

In [130]:
for i in range(num_samples):
    sign1 = np.sign(ps['beta'][i,0,0])
    sign2 = np.sign(ps['beta'][i,3,1])
    ps['beta'][i,:3,0] = ps['beta'][i,:3,0] * sign1
    ps['beta'][i,3:,1] = ps['beta'][i,3:,1] * sign2

In [131]:
print(open(log_dir+'model.txt').read())

data {
  int<lower=1> N;
  int<lower=1> K;
  int<lower=1> J;
  int<lower=0, upper=1> DD[N, J];
  matrix[N,K] zz;
}

transformed data{
  real<lower=0> c = 0.2;
}

parameters {
  vector[J] alpha;
  matrix[2,K] beta_free; // 2 free eleements per factor
  matrix[J-3,K] beta_zeros; // 3 zero elements per factor
}

transformed parameters{
  matrix[J,K] beta;
  matrix[N,J] yy;

  for(j in 1:J) {
    for (k in 1:K) beta[j,k] = 0;
  }
  // set ones
  for (k in 1:K) beta[1+3*(k-1), k] = 1;
  // set the free elements
  for (k in 1:K) beta[2+3*(k-1) : 3+3*(k-1), k] = beta_free[1:2,k];
  // set the zero elements
  beta[4:J, 1] = beta_zeros[1:(J-3), 1];
  beta[1:(J-3), K] = beta_zeros[1:(J-3), K];

  for (n in 1:N) yy[n,] = to_row_vector(alpha) + zz[n,] * beta';
}

model {
  to_vector(beta_free) ~ normal(0, 1);
  to_vector(beta_zeros) ~ normal(0, 0.1);
  to_vector(alpha) ~ normal(0, 10);
  for (j in 1:J) DD[, j] ~ bernoulli_logit(yy[, j]);
}



In [132]:
# nsim_N = 100
# data['K'] = 2
# PPP_vals, Dy, Dystr = get_PPP(data, ps, nsim_N)


In [133]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
    for k in range(data['K']):
        plots.append(plot_trace(ps['beta'][:,j,k],
             true_value=data['beta'][j,k],
             title = 'Posterior distribution for beta(%s,%s)'%(j,k)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [135]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
        plots.append(plot_trace(ps['alpha'][:,j],
             true_value=data['alpha'][j],
             title = 'Posterior distribution for alpha(%s)'%(j)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [119]:
# %%opts Layout [fig_size=200]
# plots = []
# for j in range(data['J']):
#         plots.append(plot_trace(ps['uu'][:,10,j],
#              true_value=0,
#              title = 'Posterior distribution for alpha(%s)'%(j)).\
#                      options(fig_inches=8, aspect=3))
# layout = hv.Layout(plots).options(show_title = True,
#                                   vspace = .3,
#                                   absolute_scaling=False,
#                                   normalize=False) # use same y-range for all plots?

# layout.cols(2)


In [121]:
nsim = 100
PPP_vals, Dy, Dystr = get_PPP(data, ps, nsim, L=1 )
100*np.sum(PPP_vals[:,0]<PPP_vals[:,1])/nsim

HBox(children=(IntProgress(value=0), HTML(value='')))




70.0