In [8]:
import numpy as np
import pandas as pd
import pystan
from scipy.stats import norm, multivariate_normal, invwishart, invgamma
import datetime
import sys
import os

from codebase.plot import * 
from codebase.file_utils import save_obj, load_obj

%matplotlib inline

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [30]:
log_dir = "./log/20190909_174129_test/"
data = load_obj('data', log_dir)
# fit = load_obj('fit', log_dir)
ps = load_obj('ps', log_dir)



In [31]:
data

{'N': 500,
 'K': 2,
 'J': 6,
 'alpha': array([ 1. ,  2. ,  0.3, -0.8,  1. , -1.4]),
 'beta': array([[1. , 0. ],
        [0.2, 0. ],
        [0.6, 0. ],
        [0. , 1. ],
        [0. , 0.5],
        [0. , 0.8]]),
 'sigma_z': array([1.2, 0.7]),
 'Phi_corr': array([[1. , 0.5],
        [0.5, 1. ]]),
 'Phi_cov': array([[1.44, 0.42],
        [0.42, 0.49]]),
 'Marg_cov': array([[2.44  , 0.288 , 0.864 , 0.42  , 0.21  , 0.336 ],
        [0.288 , 1.4976, 0.1728, 0.084 , 0.042 , 0.0672],
        [0.864 , 0.1728, 1.3284, 0.252 , 0.126 , 0.2016],
        [0.42  , 0.084 , 0.252 , 1.13  , 0.245 , 0.392 ],
        [0.21  , 0.042 , 0.126 , 0.245 , 1.1225, 0.196 ],
        [0.336 , 0.0672, 0.2016, 0.392 , 0.196 , 2.2736]]),
 'Theta': array([[1.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
        [0.  , 1.44, 0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.81, 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.64, 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  , 1.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  , 0.  , 1.96

In [32]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
    plots.append(plot_trace(ps['alpha'][:,j],
         true_value = data['alpha'][j],
         title = 'Posterior distribution for beta(%s)'%(j)).\
                 options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [24]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
    plots.append(plot_trace(ps['sigma'][:,j],
         true_value = data['sigma'][j],
         title = 'Posterior distribution for beta(%s)'%(j)).\
                 options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [25]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
    for k in range(data['K']):
        plots.append(plot_trace(ps['beta'][:,j,k],
             true_value = data['beta'][j,k],
             title = 'Posterior distribution for beta(%s,%s)'%(j,k)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [21]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
    plots.append(plot_trace(ps['sigma'][:,j],
         title = 'Posterior distribution for sigma(%s)'%(j)).\
                 options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


1.0765569532809025

In [23]:
sigma_prior = np.diag(np.linalg.inv(np.cov(data['y'], rowvar=False)))
sigma_prior

array([0.75856285, 0.60088576, 1.11658852, 0.54905539, 0.48899349,
       1.33502689, 0.52528143, 0.58246886, 0.51276161, 0.47957459,
       0.47082412, 0.64766155, 0.72839624, 0.50734219, 0.71997683])

In [11]:
log_dir = "log/fabian_runs2/20190712_220843_model2/"
d = load_obj("stan_data", log_dir)
ps = load_obj("ps", log_dir)

In [12]:
ps.keys()

dict_keys(['Marg_cov', 'beta', 'Phi_cov', 'sigma', 'sigma_z', 'alpha', 'Theta'])

In [16]:
for i in range(3):
    df_train = pd.DataFrame(d[i]['train']['yy'])
    df_test = pd.DataFrame(d[i]['test']['yy'])
    df_train.to_csv(log_dir+"train_fold_"+str(i)+".csv")
    df_test.to_csv(log_dir+"test_fold_"+str(i)+".csv")