In [12]:
import pandas as pd
import numpy as np
from codebase.file_utils import (
    save_obj,
    load_obj,
)
from codebase.plot import plot_density, plot_line, get_post_df
import altair as alt
from codebase.ibis import compile_model, run_stan_model, remove_chain_dim
from codebase.classes_data import Data
from codebase.file_utils import (
    save_obj,
    load_obj,
    make_folder,
    path_backslash
)
from pdb import set_trace

alt.data_transformers.disable_max_rows()


DataTransformerRegistry.enable('default')

In [2]:

task_handle = 'hmc'
gen_model = 0
existing_directory = None

if existing_directory is None:
    log_dir = make_folder(task_handle)  
    print("\n\nCreating new directory: %s" % log_dir)

else:
    log_dir = existing_directory
    log_dir = path_backslash(log_dir)
    print("\n\nReading from existing directory: %s" % log_dir)




Creating new directory: ./log/20210202_143409_hmc/


##

In [3]:

# generate data
exp_data = Data(
    name = task_handle, 
    model_num = 1,  
    size = 200,
    random_seed = 2
    )

exp_data.generate()
save_obj(exp_data, 'complete_data', log_dir)


In [5]:
exp_data.get_stan_data()

{'N': 200, 'J': 6, 'K': 1, 'D': array([[0, 1, 0, 0, 0, 0],
        [1, 1, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0],
        ...,
        [1, 1, 0, 0, 1, 0],
        [1, 1, 1, 0, 1, 0],
        [1, 1, 1, 0, 0, 0]])}

In [4]:
sm = compile_model(1, False, log_dir, save=True)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_10eb87fe19502ca55d1427f6cbf03411 NOW.


In [6]:
num_warmup = 10
num_samples = 100

param_names = ['beta', 'alpha', 'zz', 'yy']
fit_run = run_stan_model(
    data = exp_data.get_stan_data(),
    compiled_model=sm,
    num_samples = 100, 
    num_warmup = 50,
    num_chains = 1,
    initial_values=None,
    inv_metric = None,
    adapt_engaged = True)

In [23]:
ps = fit_run.extract(
        permuted=False, pars=param_names)

In [24]:
# ps = load_obj('mcmc_post_samples', log_dir)

In [25]:
ps = remove_chain_dim(ps, param_names, num_samples)

In [26]:
ps['alpha'].shape

(100, 6)

## Or load existing directory

In [27]:
# log_dir = '/log/20210122_173348_mcmc_ibis_adapt9/'
# ps = load_obj( 'mcmc_post_samples', log_dir)
# ps['beta'] = ps['beta'][num_warmup:].copy()

## Post process loadings for sign flips

In [28]:
# nsim = ps['beta'].shape[0]
# nrows = ps['beta'].shape[1]
# for n in range(nsim):
#     for i in range(nrows):
#         sign = np.sign(ps['beta'][n,0])
#         ps['beta'][n] = sign * ps['beta'][n,]

## Plot MCMC samples

In [29]:
param = 'beta'
df = get_post_df(ps[param]) 
df['source'] = 'hmc'
plot_line(df, height=100)

In [31]:
param = 'beta'
df = get_post_df(ps[param]) 
df['source'] = 'hmc'
plot_density(df, height=100)

In [32]:
param = 'alpha'
df = get_post_df(ps[param]) 
df['source'] = 'hmc'
plot_density(df, height=100)

## Plot quantiles

In [43]:
param = 'beta'
df = get_post_df(ps[param])
df_quant = df.groupby(['row', 'col'])[['value']].quantile(0.025).reset_index()
df_quant.rename({'value':'q1'}, axis=1, inplace=True)
df_quant2 = df.groupby(['row', 'col'])[['value']].quantile(0.975).reset_index()
df_quant2.rename({'value':'q2'}, axis=1, inplace=True)

df = df_quant.merge(df_quant2, on=['row', 'col'])

# simple quantile chart
df['source'] = 'hmc'
c1 = alt.Chart(df).mark_bar(opacity=0.6).encode(
        alt.X('q1', title=None),
        alt.X2('q2', title=None),
        alt.Row('row'),
        alt.Column('col'),
        alt.Color('source')
)
c1

## chart overlaying quantiles with real data

In [44]:
df['index'] = 'r_' + df.row.astype(str)+'.c_'+df.col.astype(str)
df = df.loc[:,['index', 'q1', 'q2']]
df['source'] = 'hmc'
dd = pd.DataFrame(exp_data.raw_data['beta'], columns=['data'])
dd['col'] = 0
dd['row'] = np.arange(6)
dd['index'] = 'r_' + dd.row.astype(str)+'.c_'+dd.col.astype(str)
dd = dd.loc[:,['index', 'data']]
plot_data = df.merge(dd, on=['index'])

In [45]:
c1 = alt.Chart(plot_data).mark_bar(opacity=0.6).encode(
    alt.X('q1', title=None,  scale=alt.Scale(domain=[-2,2])),
    alt.X2('q2', title=None),
    alt.Color('source'),    
)
    

c2 = alt.Chart(plot_data).mark_point(opacity=1, color='red').encode(
        alt.X('data', title=None),
)
(c1+c2).facet(
       'index',
    columns=1
    )
