In [2]:
import numpy as np
import pandas as pd
import pystan
from scipy.stats import norm, multivariate_normal, invwishart, invgamma
import datetime
# import seaborn as sns
import matplotlib.pyplot as plt


from codebase.plot import * 
from codebase.data import *
from codebase.file_utils import save_obj, load_obj

%matplotlib inline

%load_ext autoreload
%autoreload 2

In [25]:
log_dir = "./log/20191022_221654_m3/"
data = load_obj('stan_data', log_dir)
ps = load_obj('ps', log_dir)
ps.keys()

dict_keys(['Marg_cov', 'beta', 'Phi_cov', 'sigma', 'alpha', 'Theta'])

In [26]:
np.round(np.mean(ps['beta'], 0),2)

array([[ 1.  , -0.08, -0.19, -0.12, -0.14],
       [ 1.36, -0.11,  0.13, -0.01,  0.12],
       [ 2.33,  0.1 ,  0.02,  0.05, -0.02],
       [-0.1 ,  1.  ,  0.03, -0.02,  0.17],
       [ 0.  ,  0.57, -0.06, -0.1 , -0.13],
       [ 0.06,  1.67, -0.03,  0.  , -0.06],
       [-0.01,  0.11,  1.  ,  0.06,  0.11],
       [ 0.09,  0.04,  1.38, -0.02, -0.09],
       [-0.17, -0.34,  0.63, -0.15, -0.03],
       [-0.02,  0.14,  0.08,  1.  ,  0.02],
       [ 0.1 , -0.05, -0.07,  1.  ,  0.04],
       [-0.16, -0.21, -0.08,  0.76, -0.12],
       [-0.07,  0.03, -0.03, -0.17,  1.  ],
       [-0.04, -0.11, -0.02,  0.11,  0.98],
       [ 0.07,  0.1 ,  0.02,  0.02,  0.91]])

In [64]:
# df = pd.DataFrame(index=np.arange(1000))
# for j in range(15):
#     for k in range(5):
#         df[str(1+j)+','+str(1+k)] = ps['beta'][:,j,k]
# df.to_csv("log/plot_data/model2_beta.csv")
# # colsamples = np.array_split(ps['beta'], 5, axis=2)
# # np.squeeze(colsamples[0]).shape

In [27]:
num_chains = 1
num_samples = 1000
num_warmup = 1000
num_iter = num_samples + num_warmup

In [28]:
mb = np.zeros((data['J'], data['K']))
mb[:3,0] = np.array([0.772, 0.575, 0.503 ])
mb[3:6,1] = np.array([0.704, 0.657, 0.548 ])
mb[6:9,2] = np.array([0.685, 0.702, 0.622])
mb[9:12,3] = np.array([0.791, 0.736, 0.695])
mb[12:,4] = np.array([0.780, 0.738, 0.660])


In [29]:
muthen_results = dict()
muthen_results['beta'] = mb


In [30]:
np.diag(np.diag(ps['Phi_cov'][0])**(-.5))

array([[2.5332023 , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 1.46254671, 0.        , 0.        , 0.        ],
       [0.        , 0.        , 1.56002783, 0.        , 0.        ],
       [0.        , 0.        , 0.        , 1.3945933 , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 1.57905743]])

In [31]:
from numpy.linalg import inv, cholesky

beta2 = np.empty_like(ps['beta'])
for i in range(num_samples):
    beta2[i] = ps['beta'][i]  @ ps['Phi_cov'][i] @ np.diag(np.diag(ps['Phi_cov'][i])**(-.5))
    
ps['beta2'] = beta2

In [32]:
np.round(np.mean(ps['beta2'], 0),2)

array([[ 0.26,  0.12, -0.04, -0.04, -0.08],
       [ 0.49,  0.33,  0.28, -0.01,  0.21],
       [ 0.86,  0.61,  0.32,  0.06,  0.2 ],
       [ 0.37,  0.58,  0.27, -0.04,  0.34],
       [ 0.18,  0.26,  0.03, -0.05,  0.03],
       [ 0.62,  0.9 ,  0.28, -0.  ,  0.32],
       [ 0.26,  0.29,  0.61, -0.07,  0.43],
       [ 0.3 ,  0.28,  0.73, -0.13,  0.4 ],
       [-0.07, -0.12,  0.26, -0.16,  0.11],
       [ 0.09,  0.08, -0.04,  0.68, -0.04],
       [ 0.03, -0.01, -0.14,  0.69, -0.11],
       [-0.15, -0.21, -0.25,  0.54, -0.25],
       [ 0.13,  0.27,  0.41, -0.22,  0.7 ],
       [ 0.09,  0.2 ,  0.34, -0.03,  0.62],
       [ 0.21,  0.33,  0.4 , -0.08,  0.65]])

In [76]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
    for k in range(data['K']):
        plots.append(plot_trace(ps['beta'][:,j,k],
             true_value=muthen_results['beta'][j,k],
             title = 'Posterior distribution for beta(%s,%s)'%(j,k)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [58]:
plots = []
for j in range(data['J']):
    for k in range(data['K']):
        plots.append(plot_trace(ps['beta'][:,j,k],
             true_value=muthen_results['beta'][j,k],
             title = 'Posterior distribution for beta(%s,%s)'%(j,k)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


## Posterior Summaries and Comparison

In [33]:
def flatten_df(df0, val_name, var_name = 'K'):
    df = df0
    df.columns  = df.columns + 1
    df['J'] = np.arange(len(df))+1
    return df.melt(id_vars=['J'], var_name=var_name, value_name = val_name)

def post_summary(samples):
    mean =  pd.DataFrame(np.mean(samples, axis=0))
    ps_df = flatten_df(mean, 'mean')
    median = pd.DataFrame(np.median(samples, axis=0))
    ps_df['median'] = flatten_df(median, 'median')['median']
    per1 = pd.DataFrame(np.percentile(samples, 2.5,axis=0))
    ps_df['q2.5'] = flatten_df(per1, 'q2.5')['q2.5']
    per2 = pd.DataFrame(np.percentile(samples, 97.5,axis=0))
    ps_df['q97.5'] = flatten_df(per2, 'q97.5')['q97.5']
    return ps_df


post_summary(ps['beta'])

Unnamed: 0,J,K,mean,median,q2.5,q97.5
0,1,1,1.000000,1.000000,1.000000,1.000000
1,2,1,1.359887,1.345871,1.009030,1.789021
2,3,1,2.325773,2.313680,1.762389,2.959282
3,4,1,-0.104978,-0.107741,-0.273970,0.071718
4,5,1,0.003408,0.006719,-0.169917,0.179348
5,6,1,0.055180,0.057448,-0.140540,0.241522
6,7,1,-0.006046,-0.003086,-0.181321,0.168869
7,8,1,0.088627,0.089523,-0.096265,0.255446
8,9,1,-0.165483,-0.166010,-0.347955,0.016124
9,10,1,-0.020263,-0.020581,-0.202139,0.140472


In [34]:
def array_to_latex_table(df, prefix ):

    delim = np.repeat(' & ', df.shape[1]).astype(str)

    for i in range(df.shape[0]):
        nums = df[i].astype(str)
        row = "".join(i + j for i, j in zip(nums, delim))
#         row = row.replace('0.0', '{ } ')
        print( str(prefix) +str(i+1)+' & '
              + row[:-2]+ '\\\\')

In [35]:
np.mean(ps['Phi_cov'], axis=0)

array([[ 0.13185458,  0.13509005,  0.07225967,  0.00861295,  0.05562026],
       [ 0.13509005,  0.30718288,  0.10770379, -0.00519122,  0.15438613],
       [ 0.07225967,  0.10770379,  0.31881582, -0.06771743,  0.22598939],
       [ 0.00861295, -0.00519122, -0.06771743,  0.4831963 , -0.07653966],
       [ 0.05562026,  0.15438613,  0.22598939, -0.07653966,  0.47903054]])

In [36]:
flatten_df(pd.DataFrame(muthen_results['beta']), 'muthen')

Unnamed: 0,J,K,muthen
0,1,1,0.772
1,2,1,0.575
2,3,1,0.503
3,4,1,0.000
4,5,1,0.000
5,6,1,0.000
6,7,1,0.000
7,8,1,0.000
8,9,1,0.000
9,10,1,0.000


In [37]:
def C_to_R(M):
    """
    Send a covariance matrix M to the corresponding
    correlation matrix R
    Inputs
    ============
    - M : covariance matrix
    Output
    ============
    - correlation matrix
    """
    d = np.asarray(M.diagonal())
    d2 = np.diag(d**(-.5))
    R = d2 @ M @ d2
    return R


In [38]:
Phi_corr = np.empty_like(ps['Phi_cov'])
for i in range(num_samples):
    Phi_corr[i] = C_to_R(ps['Phi_cov'][i])
ps['Phi_corr'] = Phi_corr

In [39]:
np.mean(Phi_corr, axis=0)

array([[ 1.        ,  0.67396392,  0.35567654,  0.03440152,  0.22042646],
       [ 0.67396392,  1.        ,  0.34343246, -0.01265397,  0.40384355],
       [ 0.35567654,  0.34343246,  1.        , -0.17399935,  0.58252613],
       [ 0.03440152, -0.01265397, -0.17399935,  1.        , -0.15886777],
       [ 0.22042646,  0.40384355,  0.58252613, -0.15886777,  1.        ]])

In [41]:
def array_to_latex_table(df, prefix ):

    delim = np.repeat(' & ', df.shape[1]).astype(str)

    for i in range(df.shape[0]):
        nums = df[i].astype(str)
        row = "".join(i + j for i, j in zip(nums, delim))
#         row = row.replace('0.0', '{ } ')
        print( str(prefix) +str(i+1)+' & '
              + row[:-2]+ '\\\\')

df = np.mean(Phi_corr, axis=0)
df = np.round(np.tril(df), 3)
array_to_latex_table(df, 'z')


z1 & 1.0 & 0.0 & 0.0 & 0.0 & 0.0 \\
z2 & 0.674 & 1.0 & 0.0 & 0.0 & 0.0 \\
z3 & 0.356 & 0.343 & 1.0 & 0.0 & 0.0 \\
z4 & 0.034 & -0.013 & -0.174 & 1.0 & 0.0 \\
z5 & 0.22 & 0.404 & 0.583 & -0.159 & 1.0 \\


In [43]:
df = np.round(np.mean(ps['beta'], axis=0), 3)
array_to_latex_table(df, 'y')

y1 & 1.0 & -0.084 & -0.194 & -0.124 & -0.139 \\
y2 & 1.36 & -0.107 & 0.135 & -0.007 & 0.118 \\
y3 & 2.326 & 0.1 & 0.02 & 0.049 & -0.016 \\
y4 & -0.105 & 1.0 & 0.033 & -0.02 & 0.167 \\
y5 & 0.003 & 0.574 & -0.065 & -0.099 & -0.132 \\
y6 & 0.055 & 1.669 & -0.029 & 0.0 & -0.062 \\
y7 & -0.006 & 0.114 & 1.0 & 0.063 & 0.115 \\
y8 & 0.089 & 0.044 & 1.376 & -0.02 & -0.088 \\
y9 & -0.165 & -0.34 & 0.627 & -0.155 & -0.026 \\
y10 & -0.02 & 0.139 & 0.079 & 1.0 & 0.02 \\
y11 & 0.097 & -0.047 & -0.071 & 0.996 & 0.04 \\
y12 & -0.159 & -0.206 & -0.083 & 0.756 & -0.122 \\
y13 & -0.067 & 0.027 & -0.026 & -0.169 & 1.0 \\
y14 & -0.037 & -0.111 & -0.02 & 0.105 & 0.978 \\
y15 & 0.074 & 0.1 & 0.016 & 0.023 & 0.907 \\
