In [1]:
import numpy as np
import pandas as pd
import pystan
from scipy.stats import norm, multivariate_normal, invwishart, invgamma, bernoulli
from statsmodels.tsa.stattools import acf
import datetime
import sys
import os
import altair as alt


from tqdm.notebook import tqdm
from codebase.plot import * 
from codebase.data import * 
from codebase.file_utils import save_obj, load_obj

from modelresultsbinary_4chains import *
%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:
log_dir = "./log/FND/PPP/20200417_003304_FND_s1m4//"
data = load_obj('data', log_dir)
ps = load_obj('ps', log_dir)
ps.keys()


odict_keys(['alpha', 'yy', 'beta', 'zz'])

In [3]:
data

{'D': array([[1, 0, 0, 1, 0, 1],
        [1, 0, 1, 0, 1, 0],
        [1, 0, 0, 1, 1, 1],
        ...,
        [1, 1, 1, 1, 0, 0],
        [1, 0, 0, 1, 0, 1],
        [1, 0, 1, 1, 0, 0]]), 'N': 565, 'J': 6, 'K': 2}

In [4]:
print(open(log_dir+'model.txt').read())

data {
  int<lower=1> N;
  int<lower=1> K;
  int<lower=1> J;
  int<lower=0, upper=1> DD[N, J];
}

parameters {
  vector[J] alpha;
  vector[J-1] beta_free;
  vector[N] zz;
}

transformed parameters{
  matrix[N,J] yy;
  vector[J] beta;
  // set ones
  beta[1] = 1;
  // set the free elements
  beta[2 : J] = beta_free[1:(J-1)];
  for (n in 1:N) yy[n,] = to_row_vector(alpha) + to_row_vector(zz[n]*beta);
}

model {
  to_vector(beta_free) ~ normal(0, 1);
  to_vector(alpha) ~ normal(0, 10);
  to_vector(zz) ~ normal(0, 1);
  for (j in 1:J) DD[, j] ~ bernoulli_logit(yy[, j]);
}



In [5]:
np.mean(ps['yy'][0],0).shape

(565, 6)

In [6]:
num_chains = 4
num_samples = ps['alpha'].shape[0]
num_warmup = ps['alpha'].shape[0]
num_iter = num_samples + num_warmup

In [7]:
for chain_number in range(num_chains):
    if ps['beta'].ndim > 3:
        for i in range(num_samples):
            sign1 = np.sign(ps['beta'][i,chain_number,0,0])
            sign2 = np.sign(ps['beta'][i,chain_number,3,1])
            ps['beta'][i,chain_number,:3,0] = ps['beta'][i,chain_number,:3,0] * sign1
            ps['beta'][i,chain_number,3:,1] = ps['beta'][i,chain_number,3:,1] * sign2

            if 'Phi_cov' in ps.keys():
                ps['Phi_cov'][i,chain_number,0,1] = sign1 * sign2 * ps['Phi_cov'][i,chain_number,0,1]
                ps['Phi_cov'][i,chain_number,1,0] = ps['Phi_cov'][i,chain_number,0,1]


### Altair plot

In [8]:
# cn = 1
# row = 1
# col = 1 
# def return_post_df(ps, param_name, cn, row, col):
#     post_df = pd.DataFrame(data = ps[param_name][:,cn, row, col], columns=['val'])
#     post_df = post_df.reset_index()
#     post_df['param_name'] = param_name
#     post_df['cn'] = cn
#     post_df['row'] = row
#     post_df['col'] = col
#     return post_df             

# def samples_to_df(ps, param_name):
#     post_dfs = []
#     for cn in range(4):
#         for row in range(6):
#             for col in range(2):
#                 post_dfs.append(return_post_df(ps, param_name, cn, row, col))

#     return pd.concat(post_dfs, axis=0)

# samples_to_df(ps, 'beta').head()

# post_df = samples_to_df(ps, 'beta')

# c1 = alt.Chart(post_df.sample(1000)).mark_line().encode(
#     alt.X('index:O'),
#     alt.Y('val'),
#     alt.Color('cn:N'),
#     row = 'row:O',
#     column= 'col:O'
    
# ).properties(width = 200, height=100)           

# c1



In [7]:
%%opts Layout [fig_size=200]

chain_number = 1
plots = []
for j in range(data['J']):
    if ps['beta'].ndim > 3:
        for k in range(data['K']):
            plots.append(plot_trace(ps['beta'][:,chain_number,j,k],
    #              true_value=data['beta'][j,k],
                 title = 'Posterior distribution for beta(%s,%s)'%(j,k)).\
                         options(fig_inches=8, aspect=3))
    else:
        plots.append(plot_trace(ps['beta'][:,chain_number,j],
#              true_value=data['beta'][j,k],
             title = 'Posterior distribution for beta(%s)'%(j)).\
                     options(fig_inches=8, aspect=3))
        
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [8]:
%%opts Layout [fig_size=200]

chain_number = 0

plots = []
for j in range(data['J']):
        plots.append(plot_trace(ps['alpha'][:,chain_number,j],
#              true_value=data['alpha'][j],
             title = 'Posterior distribution for alpha(%s)'%(j)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [9]:
%%opts Layout [fig_size=200]

chain_number = 1

plots = []
for j in range(data['K']):
    for k in range(data['K']):
        plots.append(plot_trace(ps['Phi_cov'][:,chain_number,j,k],
#              true_value=data['Phi_cov'][j,k],
             title = 'Posterior distribution for Phi_cov(%s,%s)'%(j,k)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=True) # use same y-range for all plots?

layout.cols(2)


KeyError: 'Phi_cov'

In [10]:
nsim = 100
ppp_cn = np.empty(4)
for cn in range(4):
    PPP_vals, Dy, Dystr = get_PPP(data, ps, cn, nsim)
    
    ppp_cn[cn] = 100*np.sum(PPP_vals[:,0]<PPP_vals[:,1])/nsim
    print(ppp_cn[cn])
    
ppp_cn

HBox(children=(IntProgress(value=0), HTML(value='')))


0.0


HBox(children=(IntProgress(value=0), HTML(value='')))


0.0


HBox(children=(IntProgress(value=0), HTML(value='')))


0.0


HBox(children=(IntProgress(value=0), HTML(value='')))


1.0


array([0., 0., 0., 1.])