In [2]:
import numpy as np
import pandas as pd
import pystan
from scipy.stats import norm, multivariate_normal, invwishart, invgamma
import datetime
import sys
import os

from codebase.plot import * 
from codebase.file_utils import save_obj, load_obj

%matplotlib inline

%load_ext autoreload
%autoreload 2

In [3]:
log_dir = "./log/20190909_225014_exp1_model2/"
data = load_obj('data', log_dir)
# fit = load_obj('fit', log_dir)
ps = load_obj('ps', log_dir)



In [4]:
data

{'N': 500,
 'K': 2,
 'J': 6,
 'alpha': array([ 1. ,  2. ,  0.3, -0.8,  1. , -1.4]),
 'beta': array([[1. , 0. ],
        [0.2, 0. ],
        [0.6, 0. ],
        [0. , 1. ],
        [0. , 0.5],
        [0. , 0.8]]),
 'sigma_z': array([1.2, 0.7]),
 'Phi_corr': array([[1. , 0.5],
        [0.5, 1. ]]),
 'Phi_cov': array([[1.44, 0.42],
        [0.42, 0.49]]),
 'Marg_cov': array([[2.44  , 0.288 , 0.864 , 0.42  , 0.21  , 0.336 ],
        [0.288 , 1.4976, 0.1728, 0.084 , 0.042 , 0.0672],
        [0.864 , 0.1728, 1.3284, 0.252 , 0.126 , 0.2016],
        [0.42  , 0.084 , 0.252 , 1.13  , 0.245 , 0.392 ],
        [0.21  , 0.042 , 0.126 , 0.245 , 1.1225, 0.196 ],
        [0.336 , 0.0672, 0.2016, 0.392 , 0.196 , 2.2736]]),
 'Theta': array([[1.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
        [0.  , 1.44, 0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.81, 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.64, 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  , 1.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  , 0.  , 1.96

In [5]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
    plots.append(plot_trace(ps['sigma'][:,j],
         true_value = data['sigma'][j],
         title = 'Posterior distribution for beta(%s)'%(j)).\
                 options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [6]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
    for k in range(data['K']):
        plots.append(plot_trace(ps['beta'][:,j,k],
             true_value = data['beta'][j,k],
             title = 'Posterior distribution for beta(%s,%s)'%(j,k)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [30]:
from numpy.linalg import det, inv
def ff2(yy, model_mu, model_Sigma, p=15, q=5):
    sample_S = np.cov(yy, rowvar=False)
    ldS = np.log(det(sample_S))
    iSigma = inv(model_Sigma)
    ldSigma = np.log(det(model_Sigma))
    n_data = yy.shape[0]
    ff2 =(n_data-1)*(ldSigma+np.sum(np.diag(sample_S @ iSigma))-ldS-p)
    return ff2


In [31]:


def compute_D(post_samples, mcmc_iter, pred=True):
    
    if 'Marg_cov2' in post_samples.keys():
        marg_cov = 'Marg_cov2'
    else:
        marg_cov = 'Marg_cov'

    if 'Sigma' in post_samples.keys():
        marg_cov = 'Sigma'
    else:
        pass
    
    
    if pred == True:
        y_pred=multivariate_normal.rvs(mean= post_samples['alpha'][mcmc_iter],
                        cov=post_samples[marg_cov][mcmc_iter],
                       size = data['y'].shape[0])
        return ff2(y_pred, post_samples['alpha'][mcmc_iter], post_samples[marg_cov][mcmc_iter])

    else:
        return ff2(data['y'], post_samples['alpha'][mcmc_iter], post_samples[marg_cov][mcmc_iter])



In [32]:

log_dir0 = "./log/20190909_183952_exp1_model0/"
log_dir2 = "./log/20190909_225014_exp1_model2/"
log_dir3 = "./log/20190909_225051_exp1_model3/"


data0 = load_obj('data', log_dir0)
ps0 = load_obj('ps', log_dir0)

data2 = load_obj('data', log_dir2)
ps2 = load_obj('ps', log_dir2)

data3 = load_obj('data', log_dir3)
ps3 = load_obj('ps', log_dir3)


In [33]:
mcmc_length = ps['alpha'].shape[0]
Ds = np.empty((mcmc_length,2))
for mcmc_iter in range(mcmc_length):
    Ds[mcmc_iter,0] = compute_D(ps0, mcmc_iter, pred=False)
    Ds[mcmc_iter,1] = compute_D(ps0, mcmc_iter, pred=True)


print(np.sum(Ds[:,0] < Ds[:,1]) / mcmc_length)


0.001


In [34]:
Ds = np.empty((mcmc_length,2))
for mcmc_iter in range(mcmc_length):
    Ds[mcmc_iter,0] = compute_D(ps2, mcmc_iter, pred=False)
    Ds[mcmc_iter,1] = compute_D(ps2, mcmc_iter, pred=True)


print(np.sum(Ds[:,0] < Ds[:,1]) / mcmc_length)


0.288


In [35]:
Ds = np.empty((mcmc_length,2))
for mcmc_iter in range(mcmc_length):
    Ds[mcmc_iter,0] = compute_D(ps3, mcmc_iter, pred=False)
    Ds[mcmc_iter,1] = compute_D(ps3, mcmc_iter, pred=True)


print(np.sum(Ds[:,0] < Ds[:,1]) / mcmc_length)


0.004
