In [11]:
import numpy as np
import pandas as pd
import sys
import os
from codebase.file_utils import save_obj, load_obj
from codebase.post_process import samples_to_df, get_post_df, stack_samples
import altair as alt
alt.data_transformers.disable_max_rows()

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
ppp = True
log_dir = "./log/20210223_215340_fdtn_ppp_sc1_m11_f2/"
data = load_obj('data', log_dir)


if ppp: 
    ps = load_obj('ps', log_dir)
    ps.keys()
else: 
    print("\n\nChecking data integrity...\n\n")
    complete_data = load_obj("complete_data", log_dir)

    model_posterior_samples = dict()
    model_posterior_samples[0] = load_obj('ps_0', log_dir)
    model_posterior_samples[1] = load_obj('ps_1', log_dir)
    model_posterior_samples[2] = load_obj('ps_2', log_dir)



In [3]:
data

{'D': array([[0, 0, 0, 0, 0, 1],
        [0, 0, 1, 1, 1, 0],
        [0, 0, 0, 0, 1, 1],
        ...,
        [0, 1, 1, 0, 0, 0],
        [0, 0, 0, 0, 0, 1],
        [0, 0, 1, 0, 0, 0]]), 'N': 565, 'J': 6, 'K': 2}

In [4]:
print(open(log_dir+'model.txt').read())

data {
  int<lower=1> N;
  int<lower=1> K;
  int<lower=1> J;
  int<lower=0, upper=1> DD[N, J];
}

transformed data{
  vector[K] zeros_K = rep_vector(0, K);
  cov_matrix[K] I_K = diag_matrix(rep_vector(1, K));
}

parameters {
  vector[J] alpha;
  vector[3] beta_free1; // 1st factor
  vector[4] beta_free2; // 2nd factor 
  cholesky_factor_corr[K] L_Phi;
  matrix[N,K] zz;
}

transformed parameters{
  matrix[J,K] beta;
  matrix[N,J] yy;

  for(j in 1:J) {
    for (k in 1:K) beta[j,k] = 0;
  }

  beta[1:3, 1] = beta_free1;
  beta[1,2] = beta_free2[1]; // cross loading of first variable to both factors
  beta[4:J, 2] = beta_free2[2:4];

  for (n in 1:N) yy[n,] = to_row_vector(alpha) + zz[n,] * beta';
}

model {
  to_vector(beta_free1) ~ normal(0, 1);
  to_vector(beta_free2) ~ normal(0, 1);
  to_vector(alpha) ~ normal(0, 10);
  L_Phi ~ lkj_corr_cholesky(2);
  for (n in 1:N) to_vector(zz[n,])  ~ multi_normal_cholesky(zeros_K, L_Phi);
  for (j in 1:J) DD[, j] ~ bernoulli_logit(yy[, j]);
}

gene

In [5]:
if ppp:
    num_chains = ps['alpha'].shape[1]
    num_samples = ps['alpha'].shape[0]
    for chain_number in range(ps['alpha'].shape[1]):
        for i in range(num_samples):
            sign1 = np.sign(ps['beta'][i,chain_number,0,0])
            sign2 = np.sign(ps['beta'][i,chain_number,3,1])
            ps['beta'][i,chain_number,:3,0] = ps['beta'][i,chain_number,:3,0] * sign1
            ps['beta'][i,chain_number,3:,1] = ps['beta'][i,chain_number,3:,1] * sign2

            if 'Phi_cov' in ps.keys():
                ps['Phi_cov'][i,chain_number,0,1] = sign1 * sign2 * ps['Phi_cov'][i,chain_number,0,1]
                ps['Phi_cov'][i,chain_number,1,0] = ps['Phi_cov'][i,chain_number,0,1]
    
else:
    num_chains = model_posterior_samples[0]['alpha'].shape[1]
    num_samples = model_posterior_samples[0]['alpha'].shape[0]
    for j in range(3):
        ps = model_posterior_samples[j]    
        for chain_number in range(ps['alpha'].shape[1]):
            for i in range(num_samples):
                sign1 = np.sign(ps['beta'][i,chain_number,0,0])
                sign2 = np.sign(ps['beta'][i,chain_number,3,1])
                ps['beta'][i,chain_number,:3,0] = ps['beta'][i,chain_number,:3,0] * sign1
                ps['beta'][i,chain_number,3:,1] = ps['beta'][i,chain_number,3:,1] * sign2

                if 'Phi_cov' in ps.keys():
                    ps['Phi_cov'][i,chain_number,0,1] = sign1 * sign2 * ps['Phi_cov'][i,chain_number,0,1]
                    ps['Phi_cov'][i,chain_number,1,0] = ps['Phi_cov'][i,chain_number,0,1]
        model_posterior_samples[j] = ps



In [6]:
samples_to_df(ps, 'alpha').head()

Unnamed: 0,index,val,param_name,cn,row
0,0,-0.947889,alpha,0,0
1,1,-1.357554,alpha,0,0
2,2,-1.338031,alpha,0,0
3,3,-1.048977,alpha,0,0
4,4,-1.137323,alpha,0,0


In [10]:
param = 'beta'
df = get_post_df(ps[param])
df_quant = df.groupby(['row', 'col'])[['value']].quantile(0.025).reset_index()
df_quant.rename({'value':'q1'}, axis=1, inplace=True)
df_quant2 = df.groupby(['row', 'col'])[['value']].quantile(0.975).reset_index()
df_quant2.rename({'value':'q2'}, axis=1, inplace=True)

df = df_quant.merge(df_quant2, on=['row', 'col'])
np.round(df,2)

Unnamed: 0,row,col,q1,q2
0,0,0,0.27,1.29
1,0,1,-1.7,1.88
2,1,0,0.85,1.86
3,1,1,0.0,0.0
4,2,0,1.34,3.14
5,2,1,0.0,0.0
6,3,0,0.0,0.0
7,3,1,0.72,1.56
8,4,0,0.0,0.0
9,4,1,0.79,1.6


In [None]:
np.round(np.mean(np.mean(stack_samples(ps['beta']),0),0), 2)

In [13]:
np.round(np.mean(stack_samples(ps['beta'], 4),0), 2)

array([[0.76, 0.65],
       [1.29, 0.  ],
       [2.08, 0.  ],
       [0.  , 1.11],
       [0.  , 1.17],
       [0.  , 1.79]])

In [8]:
df = samples_to_df(ps, 'beta')

c1 = alt.Chart(df[df.cn==0]).mark_line(opacity=0.8).encode(
    alt.X('index:O'),
    alt.Y('val'),
#     alt.Color('cn:N'),
    row = 'row:O',
    column= 'col:O'
).properties(width = 200, height=100, title='Beta') 

c1.interactive()

In [9]:
num_chains = ps['alpha'].shape[1]
num_samples = ps['alpha'].shape[0]
for chain_number in range(ps['alpha'].shape[1]):
    for i in range(num_samples):
        sign1 = np.sign(ps['beta'][i,chain_number,0,0])
        sign2 = np.sign(ps['beta'][i,chain_number,3,1])
        if sign2 == -1:
            print(i, chain_number)#             ps['beta'][i,chain_number,:3,0] = ps['beta'][i,chain_number,:3,0] * sign1
#             ps['beta'][i,chain_number,3:,1] = ps['beta'][i,chain_number,3:,1] * sign2

#             if 'Phi_cov' in ps.keys():
#                 ps['Phi_cov'][i,chain_number,0,1] = sign1 * sign2 * ps['Phi_cov'][i,chain_number,0,1]
#                 ps['Phi_cov'][i,chain_number,1,0] = ps['Phi_cov'][i,chain_number,0,1]

In [10]:
c1 = alt.Chart(samples_to_df(ps, 'alpha')).mark_line(opacity=0.8).encode(
    alt.X('index:O'),
    alt.Y('val'),
    alt.Color('cn:N'),
    row = 'row:O',
    column= 'cn:N'
).properties(width = 200, height=100) 

c1.interactive()