In [367]:
import numpy as np
import pandas as pd
import pystan
from scipy.stats import norm, multivariate_normal, invwishart, invgamma, bernoulli
from statsmodels.tsa.stattools import acf
import datetime
import sys
import os

from tqdm.notebook import tqdm
from codebase.plot import * 
from codebase.data import * 
from codebase.file_utils import save_obj, load_obj

from modelresultsbinary import *
%matplotlib inline

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [368]:
# # logit
# data = gen_data_binary(10000, random_seed=0)
# np.mean(data['D'], 0)
# df = to_str_pattern(data['D'])
# df.value_counts()

In [369]:
# # logit2
# data = gen_data_binary(10000, random_seed=0)
# np.mean(data['D'], 0)
# df = to_str_pattern(data['D'])
# df.value_counts()

In [370]:
# # probit
# data = gen_data_binary(10000, random_seed=0)
# np.mean(data['D'], 0)
# df = to_str_pattern(data['D'])
# df.value_counts()

In [371]:
# # probit2
# data = gen_data_binary(10000, random_seed=0)
# np.mean(data['D'], 0)
# df = to_str_pattern(data['D'])
# df.value_counts()

In [372]:
num_chains = 1
num_samples = 1000
num_warmup = 1000
num_iter = num_samples + num_warmup

In [373]:
log_dir = "./log/bin_sim_2factor_probit_2/sim2/20200109_171341_bin2f_probit_sim2_m2//"
data = load_obj('data', log_dir)
ps = load_obj('ps', log_dir)
ps.keys()


dict_keys(['beta', 'alpha', 'zz', 'uu', 'Phi_cov', 'yy'])

In [374]:
np.round(np.mean(ps['beta'],0),2)

array([[ 1.  ,  0.02],
       [ 0.82, -0.06],
       [ 1.61,  0.04],
       [ 0.02,  1.  ],
       [ 0.07,  1.89],
       [-0.08,  0.98]])

In [375]:
data

{'random_seed': 0,
 'N': 1000,
 'K': 2,
 'J': 6,
 'alpha': array([0., 0., 0., 0., 0., 0.]),
 'beta': array([[1. , 0. ],
        [0.8, 0. ],
        [0.8, 0.5],
        [0. , 1. ],
        [0.5, 0.8],
        [0. , 0.8]]),
 'Theta': array([[1., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0.],
        [0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 1.]]),
 'e': array([[-1.53292105e+00, -1.71197016e+00,  4.61350590e-02,
         -9.58374480e-01, -8.08116129e-02, -7.03859036e-01],
        [-7.70784301e-01, -4.80845341e-01,  7.03585555e-01,
          9.29145148e-01,  3.71172553e-01, -9.89822550e-01],
        [ 6.43631275e-01,  6.88896667e-01,  2.74647204e-01,
         -6.03620436e-01,  7.08859575e-01,  4.22818575e-01],
        ...,
        [-1.24266628e+00,  2.01986652e-01, -3.05331758e-01,
         -1.19558678e+00, -1.57790317e+00,  8.49912231e-01],
        [ 3.27590142e-01, -1.66958835e-03, -3.55634293e-02,
 

In [376]:
print(open(log_dir+'model.txt').read())

data {
  int<lower=1> N;
  int<lower=1> K;
  int<lower=1> J;
  int<lower=0, upper=1> DD[N, J];
}

transformed data{
  vector[K] zeros_K = rep_vector(0, K);
  vector[J] zeros_J = rep_vector(0, J);
  cov_matrix[J] I_J = diag_matrix(rep_vector(1, J));
  cov_matrix[K] I_K = diag_matrix(rep_vector(1, K));
  real<lower=0> c = 0.2;
}

parameters {
  vector[J] alpha;
  matrix[2,K] beta_free; // 2 free eleements per factor
  matrix[J-3,K] beta_zeros; // 3 zero elements per factor
  cov_matrix [K] Phi_cov;
  matrix[N,K] zz;
  matrix[N,J] uu;
}

transformed parameters{
  matrix[J,K] beta;
  matrix[N,J] yy;

  for(j in 1:J) {
    for (k in 1:K) beta[j,k] = 0;
  }
  // set ones
  for (k in 1:K) beta[1+3*(k-1), k] = 1;
  // set the free elements
  for (k in 1:K) beta[2+3*(k-1) : 3+3*(k-1), k] = beta_free[1:2,k];
  // set the zero elements
  beta[4:J, 1] = beta_zeros[1:(J-3), 1];
  beta[1:(J-3), K] = beta_zeros[1:(J-3), K];

  for (n in 1:N) yy[n,] = to_row_vector(alpha) + zz[n,] * beta' + uu[n,];
}


In [377]:
# nsim_N = 100
# data['K'] = 2
# PPP_vals, Dy, Dystr = get_PPP(data, ps, nsim_N)


In [378]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
    for k in range(data['K']):
        plots.append(plot_trace(ps['beta'][:,j,k],
             true_value=data['beta'][j,k],
             title = 'Posterior distribution for beta(%s,%s)'%(j,k)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [379]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['K']):
    for k in range(data['K']):
        plots.append(plot_trace(ps['Phi_cov'][:,j,k],
             true_value=data['Phi_cov'][j,k],
             title = 'Posterior distribution for Phi_cov(%s,%s)'%(j,k)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [380]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
        plots.append(plot_trace(ps['alpha'][:,j],
             true_value=data['alpha'][j],
             title = 'Posterior distribution for alpha(%s)'%(j)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [381]:
nsim_N = 500
PPP_vals, Dy, Dystr = get_PPP(data, ps, nsim_N)
np.sum(PPP_vals[:,0]<PPP_vals[:,1])/nsim_N

HBox(children=(IntProgress(value=0, max=500), HTML(value='')))




0.254