In [4]:
from codebase.classes import Particles
from codebase.classes_data import Data
import pandas as pd
import numpy as np
from codebase.file_utils import (
    save_obj,
    load_obj,
    make_folder,
    path_backslash
)
from codebase.ibis import essl, exp_and_normalise, model_phonebook, run_mcmc, run_stan_model, remove_chain_dim
from codebase.plot import get_post_df, plot_density
from codebase.run_tlk import model_phonebook
from tqdm import tqdm
from scipy.special import logsumexp
import altair as alt
alt.data_transformers.disable_max_rows()

import pystan

In [5]:
log_dir = 'log/fabian_binary/20210830_024713_1f_binsim/'
data = load_obj('data', log_dir)

In [6]:

with open(
    'log/fabian_binary/20210830_024713_1f_binsim/model.txt',
    'r'
    ) as file:
    print(file.read())

data {
  int<lower=1> N;
  int<lower=1> K;
  int<lower=1> J;
  int<lower=0, upper=1> D[N, J];
}

parameters {
  vector[J] alpha;
  matrix[J,K] beta;
  matrix[N,K] z;
}

transformed parameters{
  matrix[N,J] y;
  for (n in 1:N) y[n,] = to_row_vector(alpha) + z[n,] * beta';
}
  
model {
  to_vector(beta) ~ normal(0, 1);
  to_vector(alpha) ~ normal(0, 10);
  to_vector(z) ~ normal(0, 1);
  for (j in 1:J) D[, j] ~ bernoulli_logit(y[, j]);
}



In [13]:
data.raw_data

{'random_seed': 2,
 'N': 200,
 'J': 6,
 'K': 1,
 'alpha': array([-0.53,  0.35, -1.4 , -1.4 , -0.96, -2.33]),
 'beta': array([1., 1., 1., 1., 1., 1.]),
 'z': array([-4.16757847e-01, -5.62668272e-02, -2.13619610e+00,  1.64027081e+00,
        -1.79343559e+00, -8.41747366e-01,  5.02881417e-01, -1.24528809e+00,
        -1.05795222e+00, -9.09007615e-01,  5.51454045e-01,  2.29220801e+00,
         4.15393930e-02, -1.11792545e+00,  5.39058321e-01, -5.96159700e-01,
        -1.91304965e-02,  1.17500122e+00, -7.47870949e-01,  9.02525097e-03,
        -8.78107893e-01, -1.56434170e-01,  2.56570452e-01, -9.88779049e-01,
        -3.38821966e-01, -2.36184031e-01, -6.37655012e-01, -1.18761229e+00,
        -1.42121723e+00, -1.53495196e-01, -2.69056960e-01,  2.23136679e+00,
        -2.43476758e+00,  1.12726505e-01,  3.70444537e-01,  1.35963386e+00,
         5.01857207e-01, -8.44213704e-01,  9.76147160e-06,  5.42352572e-01,
        -3.13508197e-01,  7.71011738e-01, -1.86809065e+00,  1.73118467e+00,
        

## Run MCMC

In [7]:

# with open(
#     'codebase/stancode/models/hmc/model_7.stan',
#     'r'
#     ) as file:
#     model_code = file.read()

#     sm = pystan.StanModel(model_code=model_code, verbose=False)
# save_obj(sm, 'sm_hmc', log_dir)

In [8]:
# # sm = load_obj('sm', 'log/compiled_models/model8/')
# sm = load_obj('sm_hmc', log_dir)

# fit_run = run_stan_model(
#     data = data.get_stan_data(),
#     compiled_model = sm,
#     num_samples = 1000, 
#     num_warmup = 1000,
#     num_chains = 1,
# #     initial_values={'beta':[1,1,1,1,1,1]},
#     adapt_engaged=True,
#     )

# param_names = ['beta', 'alpha']

# ps = fit_run.extract(
#         permuted=False, pars=param_names)

# save_obj(ps, 'ps_hmc', log_dir)


In [9]:
ps = load_obj('ps_hmc', log_dir)

param_names = ['beta', 'alpha']
nsim = ps['beta'].shape[0]
ps = remove_chain_dim(ps, param_names, num_samples=nsim)


## Run IBIS

In [14]:
# load existing results

ibis = load_obj('particles', log_dir)




In [15]:
gen_model = 0
model_num = 1


for name in ['alpha']:
    samples = np.squeeze(ibis.particles[name])
    w = exp_and_normalise(ibis.weights)
    print('\n\nEstimate')
    print(np.round(np.average(samples,axis=0, weights=w),2))
    # print('\nRead Data')
    # print(np.round(exp_data.raw_data[name],2))



Estimate
[-0.4   0.38 -2.   -1.23 -0.82 -2.43]


## Plot Both results

In [16]:
# have to resample particles to get rid of weights
ibis.resample_particles()
particles = ibis.particles


In [17]:
## fix zeroes
indx = np.where(np.sign(particles['beta'][:,0,0])==-1)
particles['beta'][indx,:,0] = particles['beta'][indx,:,0] * -1

In [29]:
param = 'beta'
df = get_post_df(ps[param])
df['source'] = 'Batch MCMC'
df2 = get_post_df(particles[param])
df2['source'] = 'IBIS'

plot_data['row'] = plot_data['row']+1

plot_density(pd.concat([df,df2])).configure_legend(
    titleFontSize=16,
    labelFontSize=14,
    symbolStrokeWidth=6
    ).configure_axis(
    labelFontSize=14,
    titleFontSize=16
)

In [26]:

param = 'alpha'
df = get_post_df(ps[param].reshape((1000,6,1)))
df['source'] = 'Batch MCMC'
df2 = get_post_df(particles[param].reshape((1000,6,1)))
df2['source'] = 'IBIS'

plot_data['row'] = plot_data['row']+1

plot_density(pd.concat([df,df2])).configure_legend(
    titleFontSize=16,
    labelFontSize=14,
    symbolStrokeWidth=6
    ).configure_axis(
    labelFontSize=14,
    titleFontSize=16
)


## Model Evidence

In [38]:
model_evidence = load_obj('log_lklhds', log_dir)
np.exp(logsumexp(model_evidence))


13.768321131133504

## Scoring Rule

In [39]:
scoring_rule =  load_obj('scoring_rule', log_dir)
scoring_rule

array([ 4.24206012,  4.98589029,  1.53843547,  5.62990764,  2.91994778,
        1.55473607, 10.08072747,  3.12374603,  1.69567043,  1.42757441,
        5.14925813,  4.77063591,  7.20095276,  3.48429837,  2.3615303 ,
        1.73564772,  2.36474206,  9.74171766,  3.69372223,  3.57621437,
        1.77191405,  1.62939905,  2.31975876,  2.37754834,  2.209308  ,
        1.46499705,  2.25082749,  1.37487784,  2.1193821 ,  3.62222163,
        3.24254283,  6.76548349,  2.16233251,  2.02670544,  2.57214604,
        4.02412558,  3.45852012,  4.85150488,  4.78439599,  5.94398507,
        1.62028275,  4.55538208,  1.54473909,  6.01539108,  7.10826161,
        1.59026454,  4.01557133,  3.38956643,  3.20015568,  4.35215713,
        3.93387218,  1.5050481 ,  1.4355665 ,  3.88193193,  2.04432954,
        2.83112383,  2.74813403,  5.77772305,  5.43725367,  1.5886309 ,
        1.5726457 ,  3.31867811,  3.7275261 ,  3.31762623,  4.92008477,
        2.82345801,  1.97470375,  2.75573198,  2.69316875,  2.77

### credible intervals


In [32]:
param = 'beta'
df = get_post_df(particles[param])
df_quant = df.groupby(['row', 'col'])[['value']].quantile(0.025).reset_index()
df_quant.rename({'value':'q1'}, axis=1, inplace=True)
df_quant2 = df.groupby(['row', 'col'])[['value']].quantile(0.975).reset_index()
df_quant2.rename({'value':'q2'}, axis=1, inplace=True)

df = df_quant.merge(df_quant2, on=['row', 'col'])
df.drop(['col'], 1, inplace=True)


dd = pd.DataFrame(data.raw_data['beta'], columns=['0'])
dd['row'] = np.arange(dd.shape[0])
dd = dd.melt(id_vars='row', var_name='col', value_name = 'data')
dd['col'] = dd.col.astype(int)

dd.drop(['col'], 1, inplace=True)

plot_data = df.merge(dd, on=['row'])
plot_data['row'] = plot_data['row']+1
plot_data['index'] = 'Loading('+plot_data.row.astype(str)+')'


c1 = alt.Chart(plot_data).mark_bar(opacity=0.6).encode(
        alt.X('q1', title=None),
        alt.X2('q2', title=None))

c2 = alt.Chart(plot_data).mark_point(opacity=1, color='red').encode(
        alt.X('data', title=None)
)


(c1+c2).facet(
    facet=alt.Facet('index:N', title=None),
    title='',
    columns=1
).configure_axis(
    labelFontSize=14,
).configure_title(
    fontSize=16
).configure_header(labelFontSize=16)


In [37]:
param = 'alpha'
df = get_post_df(particles[param])
df_quant = df.groupby(['row', 'col'])[['value']].quantile(0.025).reset_index()
df_quant.rename({'value':'q1'}, axis=1, inplace=True)
df_quant2 = df.groupby(['row', 'col'])[['value']].quantile(0.975).reset_index()
df_quant2.rename({'value':'q2'}, axis=1, inplace=True)

df = df_quant.merge(df_quant2, on=['row', 'col'])
df['data'] = data.raw_data['alpha']
plot_data = df
plot_data['col'] = plot_data['col']+1
plot_data['index'] = 'Intercept('+plot_data.col.astype(str)+')'


c1 = alt.Chart(plot_data).mark_bar(opacity=0.6).encode(
        alt.X('q1', title=None),
        alt.X2('q2', title=None))
c1



c2 = alt.Chart(plot_data).mark_point(opacity=1, color='red').encode(
        alt.X('data', title=None)
)


(c1+c2).facet(
    facet=alt.Facet('index:N', title=None),
    title='',
    columns=1
).configure_axis(
    labelFontSize=14,
).configure_title(
    fontSize=16
).configure_header(labelFontSize=16)
