In [1]:
from codebase.classesmcmc import Data, MCMC
import  pystan
import argparse
import numpy as np
from codebase.file_utils import (
    save_obj,
    load_obj,
    make_folder,
    path_backslash
)
from codebase.ibis import essl, exp_and_normalise, run_mcmc
from scipy.special import expit
from tqdm.notebook import tqdm


In [21]:
log_dir = './log/20201128_174019_db2/'
expdata = load_obj('data', log_dir)

In [22]:
# log_dir = './log/mcmc_hmc_test/'
ps = load_obj('mcmc_post_samples', log_dir)

In [23]:
ps['accs']/500.

array([0.722, 0.762, 0.84 , 0.788, 0.868, 0.796, 0.812, 0.826, 0.742,
       0.816, 0.854, 0.852, 0.806, 0.866, 0.87 , 0.87 , 0.804, 0.686,
       0.798, 0.87 , 0.84 , 0.844, 0.806, 0.834, 0.836, 0.812, 0.782,
       0.868, 0.688, 0.87 , 0.898, 0.802, 0.878, 0.82 , 0.806, 0.578,
       0.896, 0.848, 0.816, 0.866, 0.822, 0.814, 0.878, 0.83 , 0.816,
       0.896, 0.834, 0.852, 0.84 , 0.812, 0.866, 0.826, 0.892, 0.77 ,
       0.904, 0.85 , 0.794, 0.808, 0.796, 0.83 , 0.716, 0.806, 0.778,
       0.91 , 0.81 , 0.844, 0.844, 0.832, 0.864, 0.756, 0.754, 0.806,
       0.848, 0.866, 0.778, 0.85 , 0.864, 0.838, 0.838, 0.83 , 0.816,
       0.898, 0.832, 0.88 , 0.816, 0.71 , 0.844, 0.814, 0.882, 0.876,
       0.79 , 0.876, 0.808, 0.87 , 0.792, 0.846, 0.832, 0.896, 0.882,
       0.858])

In [24]:
import altair as alt
import pandas as pd
alt.data_transformers.disable_max_rows()

plot_data = pd.DataFrame(np.squeeze(ps['beta'])) 
plot_data['id'] = np.arange(len(plot_data))
plot_data = plot_data.melt(id_vars=['id'], var_name=['col'])

mcmc_chart = alt.Chart(plot_data).mark_line(
    opacity = 1,
    strokeWidth = 1,
).encode(
    alt.Y('value', title=None),
    alt.X('id:O',
          title=None
         )
).properties(width=200, height=100)

(mcmc_chart).facet(
    alt.Facet('col'),
    columns=3
)


In [28]:
b = np.squeeze(ps['beta'])
estb = np.mean(b, 0)
quantb = np.quantile(b, q=[0.025, 0.975], axis=0)
qb1 = quantb[0]
qb2 = quantb[1]
df2 = pd.DataFrame(estb, columns=['estimate'])
df2['q1'] = qb1
df2['q2'] = qb2
df2['realdata'] = -expdata.raw_data['beta']
df2 = df2.reset_index()
df2

Unnamed: 0,index,estimate,q1,q2,realdata
0,0,-0.853673,-1.870635,0.021564,-1.0
1,1,-0.71838,-1.657713,0.061971,-0.7
2,2,-0.703872,-1.652508,0.060644,-0.8
3,3,-0.877547,-1.755539,-0.115629,-0.5
4,4,-0.688504,-1.489132,0.167456,-0.9
5,5,-0.387557,-1.251399,0.426717,-0.6


In [29]:
c1 = alt.Chart(df2).mark_bar(
opacity=0.6).encode(
    x='q1',
    x2='q2',
    y='index:N'
)
c2 = alt.Chart(df2).mark_point(
opacity=1,
color='blue').encode(
    x='estimate',
    y='index:N'
)
c3 = alt.Chart(df2).mark_point(
opacity=1,
color='red').encode(
    x='realdata',
    y='index:N'
)
c1+c2+c3

# red real data
# blue MCMC samples

In [30]:
b = np.squeeze(ps['alpha'])
estb = np.mean(b, 0)
quantb = np.quantile(b, q=[0.025, 0.975], axis=0)
qb1 = quantb[0]
qb2 = quantb[1]
df2 = pd.DataFrame(estb, columns=['estimate'])
df2['q1'] = qb1
df2['q2'] = qb2
df2['realdata'] = expdata.raw_data['alpha']
df2 = df2.reset_index()


c1 = alt.Chart(df2).mark_bar(
opacity=0.6).encode(
    x='q1',
    x2='q2',
    y='index:N'
)
c2 = alt.Chart(df2).mark_point(
opacity=1,
color='blue').encode(
    x='estimate',
    y='index:N'
)
c3 = alt.Chart(df2).mark_point(
opacity=1,
color='red').encode(
    x='realdata',
    y='index:N'
)
c1+c2+c3

# red real data
# blue MCMC samples

In [19]:
def compile_model(model_num, log_dir, save=True):
    path_to_stan = './codebase/stancode/models/1factor/'

    with open('%smodel_%s.stan'%(
    path_to_stan,
    model_num
    ), 'r') as file:
        model_code = file.read()

    sm = pystan.StanModel(model_code=model_code, verbose=False)
    
    if save:
        save_obj(sm, 'sm', log_dir)
    return sm


def run_stan_model(
    data,
    compiled_model,
    num_samples, 
    num_warmup,
    num_chains,
    ):

    control={
        "metric" : "diag_e", # diag_e/dense_e
        "adapt_delta" : 0.999,
        "max_treedepth" : 25,
        "adapt_engaged" : True
        }

    fit_run = compiled_model.sampling(
        data = data,
        iter=num_samples + num_warmup,
        warmup=num_warmup,
        chains=num_chains,
#         init=0,
        control=control,
        n_jobs=1
    )

    return fit_run

def flatten_corr(a, offset = 0):
    """
    Flatten a [K, K ] correlation
    matrix to [M,] array where
    M is the number of of elements above the
    diagonal for a K by K matrix.

    Inputs
    ============
    - R : matrix to flattent out, should be
        of dimension [K,K]
    Output
    ============
    -  an array of size [M,]
    """
    return a[np.triu_indices(a.shape[0], k=offset)]
