In [39]:
from codebase.classesmcmc import Data, MCMC
import  pystan
import argparse
import numpy as np
from codebase.file_utils import (
    save_obj,
    load_obj,
    make_folder,
    path_backslash
)
import altair as alt 
import pandas as pd
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

##

In [27]:
def compile_model(model_num, log_dir, save=True):
    path_to_stan = './codebase/stancode/models/1factor/'

    with open('%smodel_%s.stan'%(
    path_to_stan,
    model_num
    ), 'r') as file:
        model_code = file.read()

    sm = pystan.StanModel(model_code=model_code, verbose=False)
    
    if save:
        save_obj(sm, 'sm', log_dir)
    return sm


def run_stan_model(
    data,
    compiled_model,
    num_samples, 
    num_warmup,
    num_chains,
    ):

    control={
        "metric" : "diag_e", # diag_e/dense_e
        "adapt_delta" : 0.999,
        "max_treedepth" : 25,
        "adapt_engaged" : True
        }

    fit_run = compiled_model.sampling(
        data = data,
        iter=num_samples + num_warmup,
        warmup=num_warmup,
        chains=num_chains,
#         init=0,
        control=control,
        n_jobs=1
    )

    return fit_run

def flatten_corr(a, offset = 0):
    """
    Flatten a [K, K ] correlation
    matrix to [M,] array where
    M is the number of of elements above the
    diagonal for a K by K matrix.

    Inputs
    ============
    - R : matrix to flattent out, should be
        of dimension [K,K]
    Output
    ============
    -  an array of size [M,]
    """
    return a[np.triu_indices(a.shape[0], k=offset)]


In [28]:
log_dir = './log/20201120_184708_standebug/'
# existing_directory = None
# task_handle  = 'standebug'
# if existing_directory is None:
#     log_dir = make_folder(task_handle)  
#     print("\n\nCreating new directory: %s" % log_dir)

# else:
#     log_dir = args.existing_directory
#     log_dir = path_backslash(log_dir)
#     print("\n\nReading from existing directory: %s" % log_dir)


In [29]:
data_sim = 1000
expdata = Data("1factor", 1, data_sim, random_seed=4)
expdata.generate()
save_obj(expdata, 'data', log_dir)

In [30]:
# expdata = load_obj('data', log_dir)

In [31]:
# sm = compile_model(1, log_dir)

In [32]:
sm = load_obj('sm', log_dir)

In [33]:
fit_run = run_stan_model(
    data = expdata.get_stan_data(),
    compiled_model = sm,
    num_samples = 3000, 
    num_warmup = 3000,
    num_chains = 1
    )

To run all diagnostics call pystan.check_hmc_diagnostics(fit)


In [34]:
param_names = ['beta', 'alpha', 'zz', 'betabeta']

ps = fit_run.extract(
        permuted=False, pars=param_names)

## Check $\beta \beta'$ values

In [35]:
b = np.squeeze(ps['betabeta'])
estb = flatten_corr(np.mean(b, 0))
quantb = np.quantile(b, q=[0.025, 0.975], axis=0)
qb1 = flatten_corr(quantb[0])
qb2 = flatten_corr(quantb[1])

df = pd.DataFrame(estb, columns=['estimate'])
df['q1'] = qb1
df['q2'] = qb2
df['realdata'] = flatten_corr(
    np.outer(
        expdata.raw_data['beta'],
        expdata.raw_data['beta']
    ))
df = df.reset_index()
df.head()

Unnamed: 0,index,estimate,q1,q2,realdata
0,0,1.633169,0.60993,3.654211,1.0
1,1,0.500683,0.211528,0.866774,0.7
2,2,0.859132,0.451698,1.362737,0.8
3,3,0.569283,0.225306,0.999295,0.5
4,4,1.030382,0.553913,1.662351,0.9


In [36]:
c1 = alt.Chart(df).mark_bar(
opacity=0.6).encode(
    x='q1',
    x2='q2',
    y='index:N'
)
c2 = alt.Chart(df).mark_point(
opacity=1,
color='blue').encode(
    x='estimate',
    y='index:N'
)
c3 = alt.Chart(df).mark_point(
opacity=1,
color='red').encode(
    x='realdata',
    y='index:N'
)
c1+c2+c3

# red real data
# blue MCMC samples

## Check $\beta$ value

In [37]:
b = np.squeeze(ps['beta'])
estb = np.mean(b, 0)
quantb = np.quantile(b, q=[0.025, 0.975], axis=0)
qb1 = quantb[0]
qb2 = quantb[1]
df2 = pd.DataFrame(estb, columns=['estimate'])
df2['q1'] = qb1
df2['q2'] = qb2
df2['realdata'] = expdata.raw_data['beta']
df2 = df2.reset_index()
df2

Unnamed: 0,index,estimate,q1,q2,realdata
0,0,1.245144,0.78098,1.911599,1.0
1,1,0.408575,0.172484,0.665631,0.7
2,2,0.699722,0.397859,1.03491,0.8
3,3,0.461318,0.208599,0.741131,0.5
4,4,0.836368,0.519705,1.23023,0.9
5,5,0.631918,0.286806,1.023826,0.6


In [38]:
c1 = alt.Chart(df2).mark_bar(
opacity=0.6).encode(
    x='q1',
    x2='q2',
    y='index:N'
)
c2 = alt.Chart(df2).mark_point(
opacity=1,
color='blue').encode(
    x='estimate',
    y='index:N'
)
c3 = alt.Chart(df2).mark_point(
opacity=1,
color='red').encode(
    x='realdata',
    y='index:N'
)
c1+c2+c3

# red real data
# blue MCMC samples

## Mixing 

In [48]:
df3 = pd.DataFrame(np.squeeze(ps['beta']), columns=['col'+str(i) for i in range(1,7)])
df3 = df3.reset_index()
df3 = df3.melt(id_vars = ['index'], var_name = ['col'])

In [53]:
mcmc_chart = alt.Chart(df3).mark_line(
    opacity = 1,
    strokeWidth = 1,
).encode(
    alt.Y('value', title=None),
    alt.X('index:O',
          title=None
         )
).properties(width=200, height=100)

(mcmc_chart).facet(
    alt.Facet('col'),
    columns=3
)