In [41]:
import numpy as np
import pandas as pd
import pystan
from scipy.stats import norm, multivariate_normal, invwishart, invgamma
import datetime
import sys
import os

from codebase.plot import * 
from codebase.file_utils import save_obj, load_obj

%matplotlib inline

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [42]:
log_dir = "log/fabian_runs/20190712_003321_model2/"

In [43]:
file = open(log_dir+"model.txt", "r") 
print(file.read())
file.close()

data {
  int<lower=1> N;
  int<lower=1> K;
  int<lower=1> J;
  matrix[N,J] yy;
}

parameters {
  vector<lower=0>[J] sigma;
  vector<lower=0>[K] sigma_z;
  vector[J] alpha;
  matrix[2,K] beta_free; // 2 free eleements per factor
  cholesky_factor_corr[K] Phi_corr_chol;
}

transformed parameters{
  cov_matrix[J] Theta;
  matrix[J,K] beta;
  cov_matrix [K] Phi_cov ;
  cov_matrix[J] Marg_cov;

  Theta = diag_matrix(square(sigma));
  Phi_cov = multiply_lower_tri_self_transpose(diag_pre_multiply(sigma_z, Phi_corr_chol));

  for(j in 1:J) {
    for (k in 1:K) beta[j,k] = 0;
  }

  // set ones
  for (k in 1:K) beta[1+3*(k-1), k] = 1;

  // set the free elements
  for (k in 1:K) beta[2+3*(k-1) : 3+3*(k-1), k] = beta_free[1:2,k];

  Marg_cov = beta * Phi_cov * beta'+ Theta;
}

model {
  to_vector(beta_free) ~ normal(0, 1);
  to_vector(alpha) ~ normal(0, 1);
  sigma ~ cauchy(0,1);
  sigma_z ~ cauchy(0,1);
  Phi_corr_chol ~ lkj_corr_cholesky(2);
  for (n in 1:N){
    yy[n, ] ~ multi_normal(alpha, 

In [44]:
data = load_obj("stan_data", log_dir)

In [45]:
ps = load_obj('ps', log_dir)


In [46]:
%%opts Layout [fig_size=200]
plots = []
for j in range(data['J']):
    for k in range(data['K']):
        plots.append(plot_trace(ps['beta'][:,j,k],
             title = 'Posterior distribution for beta(%s,%s)'%(j,k)).\
                     options(fig_inches=8, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
                                  vspace = .3,
                                  absolute_scaling=False,
                                  normalize=False) # use same y-range for all plots?

layout.cols(2)


In [47]:
# %%output info=True 
plots = []
for j in range(data['K']):
    for k in range(data['K']):
        if j!=k:
            plots.append(plot_trace(ps['Phi_cov'][:,j,k],
            title = 'Posterior distribution for Phi(%s,%s)'%(j,k)).options(fig_inches=10, aspect=3))
layout = hv.Layout(plots).options(show_title = True,
#     tight = True, tight_padding=10,
    vspace = .5,
    absolute_scaling=False, fig_size=100)
layout.cols(2)

In [48]:
from numpy.linalg import det, inv


def ff (yy, model_mu, model_Sigma, p=15, q=5):
    mle_est = dict()
    sample_S = np.cov(yy, rowvar=False)
    sample_m = np.mean(yy, axis=0)
    n_data = yy.shape[0]

    term1 = np.log(det(model_Sigma))
    term2 = inv(model_Sigma) @ (sample_S + (sample_m - model_mu) @  (sample_m - model_mu))
    term3 = np.log(det(sample_S)) + p + q

    ff = 0.5 * n_data * ( term1 + np.trace(term2)) - term3
    
    return ff

ff(data['yy'], ps['alpha'][213], ps['Marg_cov'][213])

8949.605934954701

In [49]:
def compute_D(mcmc_iter, pred=True):
    if pred == True:
        y_pred=multivariate_normal.rvs(mean= ps['alpha'][mcmc_iter],
                        cov=ps['Marg_cov'][mcmc_iter],
                       size = data['yy'].shape[0])
        return ff(y_pred, ps['alpha'][mcmc_iter], ps['Marg_cov'][mcmc_iter])

    else:
        return ff(data['yy'], ps['alpha'][mcmc_iter], ps['Marg_cov'][mcmc_iter])
    

In [50]:
mcmc_length = ps['alpha'].shape[0]
Ds = np.empty((mcmc_length,2))
for mcmc_iter in range(mcmc_length):
    Ds[mcmc_iter,0] = compute_D(mcmc_iter, pred=False)
    Ds[mcmc_iter,1] = compute_D(mcmc_iter, pred=True)

In [51]:
np.sum(Ds[:,0] < Ds[:,1]) / mcmc_length

0.43175