In [1]:
import numpy as np
import pandas as pd
import pystan
from scipy.stats import norm, multivariate_normal, invwishart, invgamma, bernoulli
from statsmodels.tsa.stattools import acf
import datetime
import sys
import os

from tqdm.notebook import tqdm
from codebase.plot import * 
from codebase.data import * 
from codebase.file_utils import save_obj, load_obj

from modelresultsbinary import *
%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:
num_chains = 1
num_samples = 1000
num_warmup = 1000
num_iter = num_samples + num_warmup

In [10]:
log_dir = "./log/bin_sim_2factor/20191208_191630_sim0m1/"
data = load_obj('data', log_dir)
ps = load_obj('ps', log_dir)
load_sign = np.sign(np.mean(ps['beta'][0]))
ps.keys()

dict_keys(['beta', 'alpha', 'zz', 'Phi_cov', 'yy'])

In [11]:
data['beta']

array([[1. , 0. ],
       [0.8, 0. ],
       [0.8, 0. ],
       [0. , 1. ],
       [0. , 0.8],
       [0. , 0.8]])

In [12]:
np.round(np.mean(ps['beta'],0),2)

array([[1.  , 0.  ],
       [1.19, 0.  ],
       [1.22, 0.  ],
       [0.  , 1.  ],
       [0.  , 1.1 ],
       [0.  , 1.67]])

In [13]:
print(open(log_dir+'model.txt').read())

data {
  int<lower=1> N;
  int<lower=1> K;
  int<lower=1> J;
  int<lower=0, upper=1> DD[N, J];
}

transformed data{
  vector[K] zeros_K = rep_vector(0, K);
  cov_matrix[K] I_K = diag_matrix(rep_vector(1, K));
}

parameters {
  vector[J] alpha;
  matrix[2,K] beta_free; // 2 free eleements per factor
  cov_matrix [K] Phi_cov;
  matrix[N,K] zz;
}

transformed parameters{
  matrix[J,K] beta;
  matrix[N,J] yy;

  for(j in 1:J) {
    for (k in 1:K) beta[j,k] = 0;
  }
  
  // set ones
  for (k in 1:K) beta[1+3*(k-1), k] = 1;
  // set the free elements
  for (k in 1:K) beta[2+3*(k-1) : 3+3*(k-1), k] = beta_free[1:2,k];

  for (n in 1:N) yy[n,] = to_row_vector(alpha) + zz[n,] * beta';
}
  
model {
  to_vector(beta_free) ~ normal(0, 1);
  to_vector(alpha) ~ normal(0, 10);
  Phi_cov ~ inv_wishart(J+4, I_K);
  for (n in 1:N) to_vector(zz[n,]) ~ multi_normal(zeros_K, Phi_cov);
  for (j in 1:J) DD[, j] ~ bernoulli_logit(yy[, j]);
}



In [35]:
m = 0
N = data['N']
L = 100
pistr = np.empty((N, data['J']))

for subj_i in range(N):
    z_mc = multivariate_normal.rvs(np.zeros(data['K']), ps['Phi_cov'][m], size = L);
    ystr = np.empty((L,data['J']))
    for l in range(L):
        ystr[l] = ps['alpha'][m] + z_mc[l] @ ps['beta'][m].T
    pistr[subj_i] =  np.mean(expit(ystr),0)
# return bernoulli.rvs(pistr)

In [38]:
pistr.shape

(1000, 6)

In [14]:
nsim_N = 10
PPP_vals = get_PPP(data, ps, nsim_N)



  0%|          | 0/10 [00:00<?, ?it/s][A
 10%|█         | 1/10 [00:05<00:49,  5.50s/it][A
 20%|██        | 2/10 [00:10<00:43,  5.42s/it][A
 30%|███       | 3/10 [00:16<00:39,  5.65s/it][A
 40%|████      | 4/10 [00:21<00:32,  5.40s/it][A
 50%|█████     | 5/10 [00:26<00:26,  5.29s/it][A
 60%|██████    | 6/10 [00:31<00:20,  5.10s/it][A
 70%|███████   | 7/10 [00:36<00:14,  4.98s/it][A
 80%|████████  | 8/10 [00:41<00:10,  5.04s/it][A
 90%|█████████ | 9/10 [00:46<00:05,  5.13s/it][A
100%|██████████| 10/10 [00:51<00:00,  5.12s/it][A


In [16]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
    for k in range(data['K']):
        plots.append(plot_trace(ps['beta'][:,j,k],
             true_value=data['beta'][j,k],
             title = 'Posterior distribution for beta(%s,%s)'%(j,k)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [19]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['K']):
    for k in range(data['K']):
        plots.append(plot_trace(ps['Phi_cov'][:,j,k],
             true_value=data['Phi_cov'][j,k],
             title = 'Posterior distribution for Phi_cov(%s,%s)'%(j,k)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [17]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
        plots.append(plot_trace(ps['alpha'][:,j],
             true_value=data['alpha'][j],
             title = 'Posterior distribution for alpha(%s)'%(j)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [None]:
mu = np.mean(ps['uu'],0)
plt.figure(figsize=(6,10))
ax = sns.heatmap(mu, cbar = True )

In [None]:
print(np.round(np.mean(mu,0)*1e4, 2))
hv.Bars(np.mean(abs(mu),0)).options(color='blue', xrotation=90).options(fig_inches=8, aspect=3)

In [None]:
print("Avg 0-900 = %.2f \nAvg 901-1000 = %.2f"%(np.mean(mu[:900])*1e3,np.mean(mu[900:])*1e3))

In [None]:
mu1 = np.mean(ps1['uu'],0)
plt.figure(figsize=(6,10))
ax = sns.heatmap(mu1, cbar = True )

In [None]:
print(np.round(np.mean(mu1,0)*1e4, 2))
hv.Bars(np.mean(abs(mu1),0)).options(color='blue', xrotation=90).options(fig_inches=8, aspect=3)

In [None]:
print("Avg 0-900 = %.2f \nAvg 901-1000 = %.2f"%(np.mean(mu[:900])*1e3,np.mean(mu[900:])*1e3))

In [None]:
np.mean(mu[:900],0)*1e3

In [None]:
np.mean(mu[900:],0)*1e3

In [None]:
data['y'][900:, 3:]

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)
from scipy.stats import expon
x = np.linspace(expon.ppf(0.01),
                expon.ppf(0.99), 100)
ax.plot(x, expon.pdf(x),
       'r-', lw=5, alpha=0.6, label='expon pdf')

In [None]:
x = expon.rvs(loc = 0, scale = 1/5, size = 1000)
np.percentile(x, q=[0,95])

## Residual Analysis

In [None]:
# %%opts Bars {+axiswise} [width=1000, height=300, ] 
res = pd.DataFrame(np.mean(abs(np.mean(ps1['uu'], 0)),1))
res.columns = ['avg_u']
res.reset_index(inplace=True)
res.sort_values('avg_u', ascending=False, inplace=True)
hv.Bars(res[:20],).options(color='blue', xrotation=90).options(fig_inches=8, aspect=3)
