In [1]:
import numpy as np
import pandas as pd
import pystan
from scipy.stats import norm, multivariate_normal, invwishart, invgamma
import datetime
import seaborn as sns
import matplotlib.pyplot as plt


from codebase.plot import * 
from codebase.file_utils import save_obj, load_obj

%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:
log_dir = "./log/20191015_145311_model2_std/"
data = load_obj('stan_data', log_dir)
ps = load_obj('ps', log_dir)
ps.keys()

dict_keys(['Marg_cov', 'beta', 'Phi_cov', 'sigma', 'alpha', 'Theta', 'Omega'])

In [5]:
ps['alpha'].shape

(1000, 15)

In [6]:
num_chains = 1
num_samples = 1000
num_warmup = 1200
num_iter = num_samples + num_warmup

In [7]:
mb = np.zeros((data['J'], data['K']))
mb[:3,0] = np.array([0.772, 0.575, 0.503 ])
mb[3:6,1] = np.array([0.704, 0.657, 0.548 ])
mb[6:9,2] = np.array([0.685, 0.702, 0.622])
mb[9:12,3] = np.array([0.791, 0.736, 0.695])
mb[12:,4] = np.array([0.780, 0.738, 0.660])


In [8]:
muthen_results = dict()
muthen_results['beta'] = mb


In [28]:
from numpy.linalg import inv, cholesky

beta2 = np.empty_like(ps['beta'])
for i in range(num_samples):
    beta2[i] = ps['beta'][i]  @ ps['Phi_cov'][i] @ inv(cholesky(ps['Phi_cov'][i]))
    
ps['beta2'] = beta2

In [30]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
    for k in range(data['K']):
        plots.append(plot_trace(ps['beta2'][:,j,k],
             true_value=muthen_results['beta'][j,k],
             title = 'Posterior distribution for beta(%s,%s)'%(j,k)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=True) # use same y-range for all plots?

layout.cols(2)


## Posterior Summaries and Comparison

In [94]:
def flatten_df(df0, val_name, var_name = 'K'):
    df = df0
    df.columns  = df.columns + 1
    df['J'] = np.arange(len(df))+1
    return df.melt(id_vars=['J'], var_name=var_name, value_name = val_name)

def post_summary(samples):
    mean =  pd.DataFrame(np.mean(samples, axis=0))
    ps_df = flatten_df(mean, 'mean')
    median = pd.DataFrame(np.median(samples, axis=0))
    ps_df['median'] = flatten_df(median, 'median')['median']
    per1 = pd.DataFrame(np.percentile(samples, 2.5,axis=0))
    ps_df['q2.5'] = flatten_df(per1, 'q2.5')['q2.5']
    per2 = pd.DataFrame(np.percentile(samples, 97.5,axis=0))
    ps_df['q97.5'] = flatten_df(per2, 'q97.5')['q97.5']
    return ps_df


post_summary(ps['beta2'])

Unnamed: 0,J,K,mean,median,q2.5,q97.5
0,1,1,0.168457,0.179428,-0.061919,0.344140
1,2,1,0.212957,0.220603,-0.064175,0.463872
2,3,1,0.254243,0.279629,-0.080578,0.487219
3,4,1,-0.043701,-0.029666,-0.269398,0.113326
4,5,1,-0.050611,-0.029233,-0.288443,0.110714
5,6,1,-0.066329,-0.036517,-0.381785,0.117273
6,7,1,-0.004989,-0.004439,-0.249502,0.235831
7,8,1,0.002238,0.013069,-0.322896,0.270512
8,9,1,0.016079,0.007341,-0.268542,0.328361
9,10,1,-0.030974,-0.027192,-0.404030,0.352319


In [97]:
flatten_df(pd.DataFrame(muthen_results['beta']), 'muthen')

Unnamed: 0,J,K,muthen
0,1,1,0.772
1,2,1,0.575
2,3,1,0.503
3,4,1,0.000
4,5,1,0.000
5,6,1,0.000
6,7,1,0.000
7,8,1,0.000
8,9,1,0.000
9,10,1,0.000


In [99]:
def C_to_R(M):
    """
    Send a covariance matrix M to the corresponding
    correlation matrix R
    Inputs
    ============
    - M : covariance matrix
    Output
    ============
    - correlation matrix
    """
    d = np.asarray(M.diagonal())
    d2 = np.diag(d**(-.5))
    R = d2 @ M @ d2
    return R


In [101]:
Phi_corr = np.empty_like(ps['Phi_cov'])
for i in range(num_samples):
    Phi_corr[i] = C_to_R(ps['Phi_cov'][i])
ps['Phi_corr'] = Phi_corr

In [103]:
np.mean(Phi_corr, axis=0)

array([[ 1.        ,  0.34284032,  0.11054921, -0.01471677, -0.0554204 ],
       [ 0.34284032,  1.        , -0.06551998, -0.06370323,  0.07993937],
       [ 0.11054921, -0.06551998,  1.        , -0.2371694 ,  0.27555868],
       [-0.01471677, -0.06370323, -0.2371694 ,  1.        , -0.11532489],
       [-0.0554204 ,  0.07993937,  0.27555868, -0.11532489,  1.        ]])

In [150]:
u = np.round(df[0], 3).astype(str)
l = np.repeat(' & ', df.shape[1]).astype(str)
res = "".join(i + j for i, j in zip(u, l))
res = res[:-2]
res
# print(res + '\\\\')

'1.0 & 0.343 & 0.111 & -0.015 & -0.055 '

array([[ 1.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.34284032,  1.        ,  0.        ,  0.        ,  0.        ],
       [ 0.11054921, -0.06551998,  1.        ,  0.        ,  0.        ],
       [-0.01471677, -0.06370323, -0.2371694 ,  1.        ,  0.        ],
       [-0.0554204 ,  0.07993937,  0.27555868, -0.11532489,  1.        ]])

In [176]:
def array_to_latex_table(df, prefix ):

    delim = np.repeat(' & ', df.shape[1]).astype(str)

    for i in range(df.shape[0]):
        nums = df[i].astype(str)
        row = "".join(i + j for i, j in zip(nums, delim))
#         row = row.replace('0.0', '{ } ')
        print( str(prefix) +str(i+1)+' & '
              + row[:-2]+ '\\\\')

df = np.mean(Phi_corr, axis=0)
df = np.round(np.tril(df), 3)
array_to_latex_table(df, 'z')


z1 & 1.0 & 0.0 & 0.0 & 0.0 & 0.0 \\
z2 & 0.343 & 1.0 & 0.0 & 0.0 & 0.0 \\
z3 & 0.111 & -0.066 & 1.0 & 0.0 & 0.0 \\
z4 & -0.015 & -0.064 & -0.237 & 1.0 & 0.0 \\
z5 & -0.055 & 0.08 & 0.276 & -0.115 & 1.0 \\


In [183]:
df = np.round(np.mean(ps['beta2'], axis=0), 3)
array_to_latex_table(df, 'y')

y1 & 0.168 & 0.158 & 0.022 & 0.001 & -0.041 \\
y2 & 0.213 & 0.103 & 0.064 & -0.02 & 0.026 \\
y3 & 0.254 & 0.225 & 0.077 & 0.013 & -0.059 \\
y4 & -0.044 & 0.263 & -0.066 & -0.025 & 0.074 \\
y5 & -0.051 & 0.281 & -0.0 & -0.007 & 0.015 \\
y6 & -0.066 & 0.394 & -0.07 & -0.017 & 0.029 \\
y7 & -0.005 & -0.042 & 0.266 & -0.094 & 0.139 \\
y8 & 0.002 & -0.033 & 0.376 & -0.149 & 0.179 \\
y9 & 0.016 & -0.063 & 0.362 & -0.135 & 0.2 \\
y10 & -0.031 & 0.111 & 0.134 & 0.546 & -0.104 \\
y11 & -0.015 & 0.106 & 0.122 & 0.581 & -0.105 \\
y12 & -0.01 & 0.085 & 0.135 & 0.496 & -0.118 \\
y13 & 0.035 & -0.104 & -0.104 & -0.128 & 0.552 \\
y14 & 0.046 & -0.104 & -0.077 & -0.019 & 0.63 \\
y15 & 0.034 & -0.094 & -0.084 & -0.065 & 0.505 \\
