In [13]:
from codebase.classesmcmc import Data, MCMC
import  pystan
import argparse
import numpy as np
from codebase.file_utils import (
    save_obj,
    load_obj,
    make_folder,
    path_backslash
)
import altair as alt 
import pandas as pd

##

In [14]:
def compile_model(model_num, log_dir, save=True):
    path_to_stan = './codebase/stancode/models/1factor/'

    with open('%smodel_%s.stan'%(
    path_to_stan,
    model_num
    ), 'r') as file:
        model_code = file.read()

    sm = pystan.StanModel(model_code=model_code, verbose=False)
    
    if save:
        save_obj(sm, 'sm', log_dir)
    return sm


def run_stan_model(
    data,
    compiled_model,
    num_samples, 
    num_warmup,
    num_chains,
    ):

    control={
        "metric" : "diag_e", # diag_e/dense_e
        "adapt_delta" : 0.999,
        "max_treedepth" : 25,
        "adapt_engaged" : True
        }

    fit_run = compiled_model.sampling(
        data = data,
        iter=num_samples + num_warmup,
        warmup=num_warmup,
        chains=num_chains,
#         init=0,
        control=control,
        n_jobs=1
    )

    return fit_run

def flatten_corr(a, offset = 0):
    """
    Flatten a [K, K ] correlation
    matrix to [M,] array where
    M is the number of of elements above the
    diagonal for a K by K matrix.

    Inputs
    ============
    - R : matrix to flattent out, should be
        of dimension [K,K]
    Output
    ============
    -  an array of size [M,]
    """
    return a[np.triu_indices(a.shape[0], k=offset)]


In [15]:
log_dir = './log/20201120_193025_standebug/'
# existing_directory = None
# task_handle  = 'standebug'
# if existing_directory is None:
#     log_dir = make_folder(task_handle)  
#     print("\n\nCreating new directory: %s" % log_dir)

# else:
#     log_dir = args.existing_directory
#     log_dir = path_backslash(log_dir)
#     print("\n\nReading from existing directory: %s" % log_dir)


In [16]:
data_sim = 1000
expdata = Data("1factor", 1, data_sim, random_seed=4)
expdata.generate()
save_obj(expdata, 'data', log_dir)

In [17]:
# expdata = load_obj('data', log_dir)

In [18]:
# compile_model(0, log_dir)

In [19]:
sm = load_obj('sm', log_dir)

In [20]:
fit_run = run_stan_model(
    data = expdata.get_stan_data(),
    compiled_model = sm,
    num_samples = 5000, 
    num_warmup = 5000,
    num_chains = 1
    )

To run all diagnostics call pystan.check_hmc_diagnostics(fit)


In [21]:
param_names = ['beta', 'alpha', 'zz', 'betabeta']

ps = fit_run.extract(
        permuted=False, pars=param_names)

In [22]:
b = np.squeeze(ps['betabeta'])
estb = flatten_corr(np.mean(b, 0))
quantb = np.quantile(b, q=[0.025, 0.975], axis=0)
qb1 = flatten_corr(quantb[0])
qb2 = flatten_corr(quantb[1])


In [23]:
df = pd.DataFrame(estb, columns=['estimate'])
df['q1'] = qb1
df['q2'] = qb2
df['realdata'] = flatten_corr(
    np.outer(
        expdata.raw_data['beta'],
        expdata.raw_data['beta']
    ))
df = df.reset_index()
df.head()

Unnamed: 0,index,estimate,q1,q2,realdata
0,0,1.0,1.0,1.0,1.0
1,1,0.386703,0.153222,0.740604,0.7
2,2,0.659457,0.305369,1.218804,0.8
3,3,0.4331,0.162042,0.826646,0.5
4,4,0.781381,0.400535,1.361392,0.9


In [24]:
c1 = alt.Chart(df).mark_bar(
opacity=0.6).encode(
    x='q1',
    x2='q2',
    y='index:N'
)
c2 = alt.Chart(df).mark_point(
opacity=1,
color='blue').encode(
    x='estimate',
    y='index:N'
)
c3 = alt.Chart(df).mark_point(
opacity=1,
color='red').encode(
    x='realdata',
    y='index:N'
)
c1+c2+c3

# red real data
# blue MCMC samples