In [1]:
import pandas as pd
import numpy as np
from codebase.file_utils import (
    save_obj,
    load_obj,
)
from codebase.plot import plot_density, plot_line, get_post_df
import altair as alt
from codebase.classes_data import Data
from run_mcmc import run_mcmc_from_start
from codebase.file_utils import (
    save_obj,
    load_obj,
    make_folder,
    path_backslash
)
from pdb import set_trace
from codebase.ibis import post_process_sign

alt.data_transformers.disable_max_rows()


DataTransformerRegistry.enable('default')

In [2]:

task_handle = 'mcmc_rasch_dense/'
gen_model = 0
existing_directory = None

if existing_directory is None:
    log_dir = make_folder(task_handle)  
    print("\n\nCreating new directory: %s" % log_dir)

else:
    log_dir = existing_directory
    log_dir = path_backslash(log_dir)
    print("\n\nReading from existing directory: %s" % log_dir)
    




Creating new directory: ./log/20210303_194136_mcmc_rasch_dense//


##

In [3]:

# generate data
exp_data = Data(name=task_handle, model_num=1, size=100, random_seed=2)

exp_data.generate()
save_obj(exp_data, 'complete_data', log_dir)


In [4]:
param_names = ['beta', 'alpha']
latent_names = ['z', 'y']
ps = run_mcmc_from_start(
    stan_data=exp_data.get_stan_data(),
    mcmc_nsim=100,
    mcmc_adapt_nsim=1,
    model_num=3,
    bundle_size=50,
    gen_model=False,
    param_names=param_names,
    latent_names=latent_names,
    log_dir=log_dir,
    hmc_adapt_nsim=500,
    hmc_post_adapt_nsim=5
)

save_obj(ps, 'mcmc_post_samples', log_dir)


100%|██████████| 100/100 [01:02<00:00,  1.61it/s]


In [5]:
# ps = load_obj('mcmc_post_samples', log_dir)

In [6]:
ps['beta'].shape

(100, 6, 1)

## Or load existing directory

## Best runs 

Best results so far are stored in `./log/20210301_123541_mcmc_debug/`


In [7]:
# log_dir = './log/20210301_123541_mcmc_debug/'
# ps = load_obj( 'mcmc_post_samples', log_dir)
## ps['beta'] = ps['beta'][num_warmup:].copy()

## Post process loadings for sign flips

In [8]:

ps = post_process_sign(ps)

## Plot MCMC samples

In [9]:
param = 'beta'
df = get_post_df(ps[param]) 
df['source'] = 'mcmc'
plot_line(df, height=100)

INFO:numexpr.utils:Note: NumExpr detected 24 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [10]:
param = 'beta'
df = get_post_df(ps[param]) 
df['source'] = 'mcmc'
plot_density(df, height=100)

In [11]:
param = 'alpha'
df = get_post_df(ps[param]) 
df['source'] = 'mcmc'
plot_density(df, height=100)

## Plot quantiles

## chart overlaying quantiles with real data

In [12]:
param = 'beta'
df = get_post_df(ps[param])
df_quant = df.groupby(['row', 'col'])[['value']].quantile(0.025).reset_index()
df_quant.rename({'value':'q1'}, axis=1, inplace=True)
df_quant2 = df.groupby(['row', 'col'])[['value']].quantile(0.975).reset_index()
df_quant2.rename({'value':'q2'}, axis=1, inplace=True)

df = df_quant.merge(df_quant2, on=['row', 'col'])

# simple quantile chart
df['source'] = 'mcmc'
df['index'] = 'r_' + df.row.astype(str)+'.c_'+df.col.astype(str)
df = df.loc[:,['index', 'q1', 'q2']]
df['source'] = 'mcmc'
dd = pd.DataFrame(exp_data.raw_data['beta'], columns=['data'])
dd['col'] = 0
dd['row'] = np.arange(6)
dd['index'] = 'r_' + dd.row.astype(str)+'.c_'+dd.col.astype(str)
dd = dd.loc[:,['index', 'data']]
plot_data = df.merge(dd, on=['index'])

c1 = alt.Chart(plot_data).mark_bar(opacity=0.6).encode(
    alt.X('q1', title=None,  scale=alt.Scale(domain=[-2,2])),
    alt.X2('q2', title=None),
    alt.Color('source'),    
)
    

c2 = alt.Chart(plot_data).mark_point(opacity=1, color='red').encode(
        alt.X('data', title=None),
)
(c1+c2).facet(
       'index',
    columns=1
    )


In [13]:
param = 'alpha'
df = get_post_df(ps[param])
df_quant = df.groupby(['row', 'col'])[['value']].quantile(0.025).reset_index()
df_quant.rename({'value':'q1'}, axis=1, inplace=True)
df_quant2 = df.groupby(['row', 'col'])[['value']].quantile(0.975).reset_index()
df_quant2.rename({'value':'q2'}, axis=1, inplace=True)

df = df_quant.merge(df_quant2, on=['row', 'col'])

# simple quantile chart
df['source'] = 'mcmc'

df['index'] = 'r_' + df.row.astype(str)+'.c_'+df.col.astype(str)
df = df.loc[:,['index', 'q1', 'q2']]
df['source'] = 'smc2'
dd = pd.DataFrame(exp_data.raw_data['alpha'], columns=['data'])
dd['col'] = np.arange(6)
dd['row'] = 0
dd['index'] = 'r_' + dd.row.astype(str)+'.c_'+dd.col.astype(str)
dd = dd.loc[:,['index', 'data']]
plot_data = df.merge(dd, on=['index'])

c1 = alt.Chart(plot_data).mark_bar(opacity=0.6).encode(
    alt.X('q1', title=None,  scale=alt.Scale(domain=[-4,4])),
    alt.X2('q2', title=None),
    alt.Color('source'),    
)
    

c2 = alt.Chart(plot_data).mark_point(opacity=1, color='red').encode(
        alt.X('data', title=None),
)
(c1+c2).facet(
       'index',
    columns=1
    )
