In [1]:
import pandas as pd
import numpy as np
from codebase.file_utils import (
    save_obj,
    load_obj,
)
from codebase.plot import plot_density, plot_line, get_post_df
import altair as alt
from codebase.classes_data import Data
from run_mcmc import run_mcmc
from codebase.file_utils import (
    save_obj,
    load_obj,
    make_folder,
    path_backslash
)
from pdb import set_trace

alt.data_transformers.disable_max_rows()


DataTransformerRegistry.enable('default')

In [2]:

task_handle = 'mcmc_ibis_adapt9'
gen_model = 0
existing_directory = None

if existing_directory is None:
    log_dir = make_folder(task_handle)  
    print("\n\nCreating new directory: %s" % log_dir)

else:
    log_dir = existing_directory
    log_dir = path_backslash(log_dir)
    print("\n\nReading from existing directory: %s" % log_dir)




Creating new directory: ./log/20210122_174928_mcmc_ibis_adapt9/


##

In [3]:

# generate data
exp_data = Data(
    name = task_handle, 
    model_num = 1,  
    size = 200,
    random_seed = 2
    )

exp_data.generate()
save_obj(exp_data, 'complete_data', log_dir)


In [4]:
num_warmup = 200

param_names = ['beta', 'alpha']
latent_names = ['z', 'y_latent']
ps = run_mcmc(
    stan_data=exp_data.get_stan_data(),
    nsim_mcmc=400,
    num_warmup = num_warmup,
    model_num=7,
    bundle_size=100,
    gen_model=gen_model,
    param_names=param_names,
    latent_names=latent_names,
    log_dir=log_dir
)

save_obj(ps, 'mcmc_post_samples', log_dir)


100%|██████████| 400/400 [16:03<00:00,  2.41s/it]


In [5]:
# ps = load_obj('mcmc_post_samples', log_dir)

In [6]:
ps['beta'].shape

(400, 6, 1)

## Or load existing directory

In [7]:
# log_dir = '/log/20210122_173348_mcmc_ibis_adapt9/'
# ps = load_obj( 'mcmc_post_samples', log_dir)
# ps['beta'] = ps['beta'][num_warmup:].copy()

## Post process loadings for sign flips

In [8]:
nsim = ps['beta'].shape[0]
nrows = ps['beta'].shape[1]
for n in range(nsim):
    for i in range(nrows):
        sign = np.sign(ps['beta'][n,0])
        ps['beta'][n] = sign * ps['beta'][n,]

## Plot MCMC samples

In [9]:
param = 'beta'
df = get_post_df(ps[param]) 
df['source'] = 'mcmc'
plot_line(df, height=100)

INFO:numexpr.utils:Note: NumExpr detected 24 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [10]:
param = 'beta'
df = get_post_df(ps[param]) 
df['source'] = 'mcmc'
plot_density(df, height=100)

In [11]:
param = 'alpha'
df = get_post_df(ps[param]) 
df['source'] = 'mcmc'
plot_density(df, height=100)

## Plot quantiles

In [12]:
param = 'beta'
df = get_post_df(ps[param])
df_quant = df.groupby(['row', 'col'])[['value']].quantile(0.025).reset_index()
df_quant.rename({'value':'q1'}, axis=1, inplace=True)
df_quant2 = df.groupby(['row', 'col'])[['value']].quantile(0.975).reset_index()
df_quant2.rename({'value':'q2'}, axis=1, inplace=True)

df = df_quant.merge(df_quant2, on=['row', 'col'])

# simple quantile chart
df['source'] = 'mcmc'
c1 = alt.Chart(df).mark_bar(opacity=0.6).encode(
        alt.X('q1', title=None),
        alt.X2('q2', title=None),
        alt.Row('row'),
        alt.Column('col'),
        alt.Color('source')
)
c1

## chart overlaying quantiles with real data

In [13]:
df['index'] = 'r_' + df.row.astype(str)+'.c_'+df.col.astype(str)
df = df.loc[:,['index', 'q1', 'q2']]
df['source'] = 'mcmc'
df

Unnamed: 0,index,q1,q2,source
0,r_0.c_0,0.447711,2.004634,mcmc
1,r_1.c_0,0.363865,1.659466,mcmc
2,r_2.c_0,0.108041,1.501151,mcmc
3,r_3.c_0,0.199182,1.525545,mcmc
4,r_4.c_0,0.412196,1.742846,mcmc
5,r_5.c_0,0.339257,1.811514,mcmc


In [14]:
c1 = alt.Chart(df).mark_bar(opacity=0.6).encode(
        alt.X('q1', title=None),
        alt.X2('q2', title=None),
        alt.Color('source')
)
c1.facet(
       'index',
    columns=2
    )

    

In [15]:
dd = pd.DataFrame(exp_data.raw_data['beta'], columns=['data'])
dd['col'] = 0
dd['row'] = np.arange(6)
dd['index'] = 'r_' + dd.row.astype(str)+'.c_'+dd.col.astype(str)
dd = dd.loc[:,['index', 'data']]
dd

Unnamed: 0,index,data
0,r_0.c_0,1.0
1,r_1.c_0,0.7
2,r_2.c_0,0.8
3,r_3.c_0,0.5
4,r_4.c_0,0.9
5,r_5.c_0,0.6


In [16]:
plot_data = df.merge(dd, on=['index'])

In [17]:
c1 = alt.Chart(plot_data).mark_bar(opacity=0.6).encode(
    alt.X('q1', title=None,  scale=alt.Scale(domain=[-2,2])),
    alt.X2('q2', title=None),
    alt.Color('source'),    
)
    

c2 = alt.Chart(plot_data).mark_point(opacity=1, color='red').encode(
        alt.X('data', title=None),
)
(c1+c2).facet(
       'index',
    columns=1
    )
