In [1]:
from codebase.classes import Particles
from codebase.classes_data import Data
import pandas as pd
import numpy as np
from codebase.file_utils import (
    save_obj,
    load_obj,
    make_folder,
    path_backslash
)
from codebase.ibis import essl, exp_and_normalise, model_phonebook, run_mcmc
from codebase.plot import get_post_df, plot_density
from codebase.run_tlk import model_phonebook
from tqdm import tqdm
from scipy.special import logsumexp
import altair as alt
alt.data_transformers.disable_max_rows()


import pystan

In [2]:
log_dir = 'log/20210906_172020_test_prior_m14_copy/'
data = load_obj('data', log_dir)


In [3]:
data.raw_data

{'random_seed': 0,
 'N': 200,
 'K': 2,
 'J': 6,
 'alpha': array([0., 0., 0., 0., 0., 0.]),
 'beta': array([[1. , 0. ],
        [0.8, 0. ],
        [0.8, 0. ],
        [0. , 1. ],
        [0. , 0.8],
        [0. , 0.8]]),
 'sigma_z': array([0.80622577, 0.80622577]),
 'Phi_corr': array([[1. , 0.2],
        [0.2, 1. ]]),
 'Phi_cov': array([[0.65, 0.13],
        [0.13, 0.65]]),
 'Marg_cov': array([[1.    , 0.52  , 0.52  , 0.13  , 0.104 , 0.104 ],
        [0.52  , 1.    , 0.416 , 0.104 , 0.0832, 0.0832],
        [0.52  , 0.416 , 1.    , 0.104 , 0.0832, 0.0832],
        [0.13  , 0.104 , 0.104 , 1.    , 0.52  , 0.52  ],
        [0.104 , 0.0832, 0.0832, 0.52  , 1.    , 0.416 ],
        [0.104 , 0.0832, 0.0832, 0.52  , 0.416 , 1.    ]]),
 'Theta': array([[0.35 , 0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.584, 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.584, 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.35 , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.584, 

## Run MCMC

In [4]:
posterior_samples = dict()


In [5]:
## model 1 
model_num = 7

sm = load_obj('sm', 'log/compiled_models/model'+str(model_num)+'/')

param_names = model_phonebook(model_num)['param_names']
latent_names = model_phonebook(model_num)['latent_names']
stan_names = model_phonebook(model_num)['stan_names']

fit_run = run_mcmc(
    data.get_stan_data(),
    sm,
    num_samples = 1000,
    num_warmup = 1000,
    adapt_engaged=True,
    num_chains = 1,
    log_dir = './log/20210906_172026_test_prior_m7_copy/'
)
ps = fit_run.extract(permuted=False, pars=param_names)
posterior_samples['batch1'] = ps

In [6]:
## model 2 
model_num = 14

sm = load_obj('sm', 'log/compiled_models/model'+str(model_num)+'/')

param_names = model_phonebook(model_num)['param_names']
latent_names = model_phonebook(model_num)['latent_names']
stan_names = model_phonebook(model_num)['stan_names']

fit_run = run_mcmc(
    data.get_stan_data(),
    sm,
    num_samples = 1000,
    num_warmup = 1000,
    adapt_engaged=True,
    num_chains = 1,
    log_dir = './log/20210906_172020_test_prior_m14_copy/'
)
ps = fit_run.extract(permuted=False, pars=param_names)
posterior_samples['batch2'] = ps

In [9]:
model_code = """
data {
  int<lower=1> N;
  int<lower=1> K;
  int<lower=1> J;
  matrix[N,J] y;
  vector[J] sigma_prior;
}

transformed data{
  vector[J] zeros = rep_vector(0, J);
  cov_matrix[K] I_K = diag_matrix(rep_vector(1, K));
  real<lower=0> c0 = 5;
}

parameters {
  vector<lower=0>[J] sigma;
  vector[J] alpha;
  matrix[2,K] beta_free; // 2 free eleements per factor
  cov_matrix [K] Phi_cov;
}

transformed parameters{
  vector<lower=0>[J] sigma_square;
  cov_matrix[J] Theta;
  matrix[J,K] beta;
  cov_matrix[J] Marg_cov;
  
  sigma_square = square(sigma);
  
  Theta = diag_matrix(sigma_square);

  for(j in 1:J) {
    for (k in 1:K) beta[j,k] = 0;
    }

  // set ones
  for (k in 1:K) beta[1+3*(k-1), k] = 1;

  // set the free elements
  for (k in 1:K) beta[2+3*(k-1) : 3+3*(k-1), k] = beta_free[1:2,k];

  Marg_cov = beta * Phi_cov * beta'+ Theta;

}

model {
  to_vector(beta_free) ~ normal(0, 1);
  to_vector(alpha) ~ normal(0, 10);
  to_vector(sigma) ~ cauchy(0, c0);
  Phi_cov ~ inv_wishart(J+4, I_K);
  for (n in 1:N){
    y[n, ] ~ multi_normal(alpha,  Marg_cov);
  }
  
}

"""

sm = pystan.StanModel(model_code=model_code, verbose=False)
fit_run = run_mcmc(
    data.get_stan_data(),
    sm,
    num_samples = 1000,
    num_warmup = 1000,
    adapt_engaged=True,
    num_chains = 1,
    log_dir = './log/20210906_172020_test_prior_m14_copy/'
)
ps = fit_run.extract(permuted=False, pars=param_names)
posterior_samples['batch3'] = ps

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_9e2194e3c189cb96a811f763877d2eb9 NOW.


In [13]:
model_code = """
data {
  int<lower=1> N;
  int<lower=1> K;
  int<lower=1> J;
  matrix[N,J] y;
  vector[J] sigma_prior;
}

transformed data{
  vector[J] zeros = rep_vector(0, J);
  cov_matrix[K] I_K = diag_matrix(rep_vector(1, K));
  real<lower=0> c0 = 10;
}

parameters {
  vector<lower=0>[J] sigma;
  vector[J] alpha;
  matrix[2,K] beta_free; // 2 free eleements per factor
  cov_matrix [K] Phi_cov;
}

transformed parameters{
  vector<lower=0>[J] sigma_square;
  cov_matrix[J] Theta;
  matrix[J,K] beta;
  cov_matrix[J] Marg_cov;
  
  sigma_square = square(sigma);
  
  Theta = diag_matrix(sigma_square);

  for(j in 1:J) {
    for (k in 1:K) beta[j,k] = 0;
    }

  // set ones
  for (k in 1:K) beta[1+3*(k-1), k] = 1;

  // set the free elements
  for (k in 1:K) beta[2+3*(k-1) : 3+3*(k-1), k] = beta_free[1:2,k];

  Marg_cov = beta * Phi_cov * beta'+ Theta;

}

model {
  to_vector(beta_free) ~ normal(0, 1);
  to_vector(alpha) ~ normal(0, 10);
  to_vector(sigma) ~ uniform(0, c0);
  Phi_cov ~ inv_wishart(J+4, I_K);
  for (n in 1:N){
    y[n, ] ~ multi_normal(alpha,  Marg_cov);
  }
  
}

"""

sm = pystan.StanModel(model_code=model_code, verbose=False)
fit_run = run_mcmc(
    data.get_stan_data(),
    sm,
    num_samples = 1000,
    num_warmup = 1000,
    adapt_engaged=True,
    num_chains = 1,
    log_dir = './log/20210906_172020_test_prior_m14_copy/'
)
ps = fit_run.extract(permuted=False, pars=param_names)
posterior_samples['batch4'] = ps

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_a8c8db8143e6f03a926b48d0ed3e2de2 NOW.


## Plot Both results

In [14]:
param = 'beta'
df = get_post_df(np.squeeze(posterior_samples['batch1'][param]))
df['source'] = 'inv-gamma-data-dependent'

df2 = get_post_df(np.squeeze(posterior_samples['batch2'][param]))
df2['source'] = 'inv-gamma-(0.1,0.1)'

df3 = get_post_df(np.squeeze(posterior_samples['batch3'][param]))
df3['source'] = 'cauchy-(5)'

df4 = get_post_df(np.squeeze(posterior_samples['batch4'][param]))
df4['source'] = 'uniform-(0,10)'

plot_density(pd.concat([df, df2, df3, df4]))

In [15]:
param = 'alpha'
df = get_post_df(np.squeeze(posterior_samples['batch1'][param]))
df['source'] = 'inv-gamma-data-dependent'

df2 = get_post_df(np.squeeze(posterior_samples['batch2'][param]))
df2['source'] = 'inv-gamma-(0.1,0.1)'

df3 = get_post_df(np.squeeze(posterior_samples['batch3'][param]))
df3['source'] = 'cauchy-(5)'

df4 = get_post_df(np.squeeze(posterior_samples['batch4'][param]))
df4['source'] = 'uniform-(0,10)'

plot_density(pd.concat([df, df2, df3, df4]))