In [1]:
from codebase.classes_ibis_lvm import ParticlesLVM
from codebase.classes_data import Data
from codebase.ibis import model_phonebook, essl
import pandas as pd
import numpy as np
from codebase.file_utils import (
    save_obj,
    load_obj,
    make_folder,
    path_backslash
)
from codebase.ibis import essl, exp_and_normalise, run_stan_model
from run_ibis import run_ibis

from tqdm import tqdm
from scipy.special import logsumexp
import altair as alt
from codebase.plot import plot_density, get_post_df, plot_correlations
alt.data_transformers.disable_max_rows()


DataTransformerRegistry.enable('default')

## 2 factor Sim EZ

In [77]:
# existing_directory = None
task_handle = 'ez_lvm'
gen_model = 0

# if existing_directory is None:
#     log_dir = make_folder(task_handle)  
#     print("\n\nCreating new directory: %s" % log_dir)

# else:
#     log_dir = existing_directory
#     log_dir = path_backslash(log_dir)
#     print("\n\nReading from existing directory: %s" % log_dir)

log_dir = './log/20201230_171819_ez_lvm/'

## Create Data

In [7]:
# generate data
exp_data = Data(
    name = task_handle, 
    model_num = 1, 
    size = 200,
    random_seed = 6
    )
    
exp_data.generate()

In [45]:
exp_data.get_stan_data()

{'N': 200, 'J': 6, 'K': 1, 'D': array([[1, 1, 0, 0, 0, 0],
        [0, 1, 0, 1, 1, 0],
        [0, 0, 0, 0, 1, 0],
        ...,
        [0, 0, 0, 0, 0, 0],
        [1, 1, 0, 0, 0, 0],
        [0, 1, 0, 1, 1, 0]])}

In [47]:
# particles = ParticlesLVM(
#     name = 'ibis_lvm',
#     model_num = 7,
#     size = 100,
#     bundle_size=50,
#     param_names = ['param_names'],
#     latent_names = ['latent_names'],
#     latent_model_num= 1
# )
# particles.set_log_dir(log_dir)
# # particles.compile_model()
# particles.compile_prior_model()


## Run HMC

In [24]:
# sm = load_obj('sm', 'log/compiled_models/model8/')

import pystan

with open(
    'codebase/stancode/models/1factor/model_1.stan',
    'r'
    ) as file:
    model_code = file.read()

    sm = pystan.StanModel(model_code=model_code, verbose=False)
save_obj(sm, 'sm_hmc', log_dir)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_7b691d096ece9442c80a4dc5e87eb167 NOW.


In [48]:
# sm = load_obj('sm', 'log/compiled_models/model8/')
# sm = load_obj('sm_hmc', log_dir)

fit_run = run_stan_model(
    data = exp_data.get_stan_data(),
    compiled_model = sm,
    num_samples = 1000, 
    num_warmup = 1000,
    num_chains = 1,
#     initial_values={'beta':[0,0,0,0,0,0]},
    adapt_engaged=True,
    )

In [49]:
param_names = ['beta', 'alpha', 'betabeta']

ps = fit_run.extract(
        permuted=False, pars=param_names)

save_obj(ps, 'ps_hmc', log_dir)

In [8]:
ps = load_obj( 'ps_hmc', log_dir)

In [9]:
exp_data.raw_data['beta']

array([1. , 0.7, 0.8, 0.5, 0.9, 0.6])

In [11]:
np.round(np.mean(ps['beta'], 0),2)

array([[0.58, 1.72, 0.69, 0.81, 0.89, 0.93]])

In [52]:
param = 'beta'
df = get_post_df(ps[param].reshape(1000,1,6,1)) 
df['source'] = 'hmc'
plot_density(df) 
# &\
# plot_correlations(ibis['jitter_corrs'][param], height = 150)

## Run IBIS

In [5]:
# load existing results

# log_dir = 'log/20201218_172340_ez_comp/'
# particles = load_obj('particles', log_dir)

In [None]:
gen_model = False
model_num = 7
degeneracy_limit = 0.5
gen_model = False

param_names = model_phonebook(model_num)['param_names']
latent_names = model_phonebook(model_num)['latent_names']
jitter_corrs = dict()
for p in param_names:
    jitter_corrs[p] = np.zeros(exp_data.size)
particles = ParticlesLVM(
    name = 'ibis_lvm',
    model_num = model_num,
    size = 1000,
    bundle_size=100,
    param_names = param_names,
    latent_names = latent_names,
    latent_model_num= 1
)
particles.set_log_dir(log_dir)
if gen_model:
    particles.compile_prior_model()
    particles.compile_model()
else:
    particles.load_prior_model()
    particles.load_model()

log_lklhds = np.empty(exp_data.size)
degeneracy_limit = 0.5


particles.sample_prior_particles(exp_data.get_stan_data()) # sample prior particles
particles.reset_weights() # set weights to 0


for t in tqdm(range(exp_data.size)):    
    particles.sample_latent_variables(exp_data.get_stan_data_at_t(t))
    particles.get_bundle_weights(exp_data.get_stan_data_at_t(t))

    particles.incremental_weights = np.mean(np.squeeze(particles.latent_weights), axis=1)
    log_lklhds[t] =  particles.get_loglikelihood_estimate()
    

    if (essl(particles.weights) < degeneracy_limit * particles.size) and (t+1) < exp_data.size:
        
        particles.resample_particles()    
        
        particles.sample_latent_variables(exp_data.get_stan_data_upto_t(t+1))
        particles.get_bundle_weights(exp_data.get_stan_data_upto_t(t+1))

        particles.sample_latent_particles_star(exp_data.get_stan_data_upto_t(t+1))
        particles.sample_latent_var_given_theta(exp_data.get_stan_data_upto_t(t+1))

        
        ## add corr of param before jitter
        pre_jitter = dict()
        for p in param_names:
            pre_jitter[p] = particles.particles[p].flatten()
        ####

        particles.jitter(exp_data.get_stan_data_upto_t(t+1))

        ## add corr of param
        for p in param_names:
            jitter_corrs[p][t] = np.corrcoef(pre_jitter[p],particles.particles[p].flatten())[0,1]          
        ####

        particles.reset_weights()
    else:
        particles.update_weights()

    save_obj(t, 't', log_dir)
    save_obj(particles, 'particles', log_dir)




  0%|          | 0/200 [00:00<?, ?it/s][A[A

  0%|          | 1/200 [00:09<33:04,  9.97s/it][A[A

  1%|          | 2/200 [00:53<1:05:38, 19.89s/it][A[A

  2%|▏         | 3/200 [01:03<56:05, 17.08s/it]  [A[A

  2%|▏         | 4/200 [01:13<49:11, 15.06s/it][A[A

  2%|▎         | 5/200 [02:35<1:53:57, 35.06s/it][A[A

  3%|▎         | 6/200 [02:45<1:29:19, 27.63s/it][A[A

  4%|▎         | 7/200 [04:28<2:41:04, 50.07s/it][A[A

  4%|▍         | 8/200 [04:38<2:01:48, 38.07s/it][A[A

  4%|▍         | 9/200 [04:48<1:34:25, 29.66s/it][A[A

  5%|▌         | 10/200 [07:10<3:20:25, 63.29s/it][A[A

  6%|▌         | 11/200 [07:20<2:28:50, 47.25s/it][A[A

  6%|▌         | 12/200 [10:04<4:17:59, 82.34s/it][A[A

  6%|▋         | 13/200 [10:14<3:09:20, 60.75s/it][A[A

  7%|▋         | 14/200 [10:24<2:21:28, 45.64s/it][A[A

  8%|▊         | 15/200 [13:44<4:43:10, 91.84s/it][A[A

  8%|▊         | 16/200 [13:54<3:26:30, 67.34s/it][A[A

  8%|▊         | 17/200 [14:05<2:33:

In [29]:
save_obj(particles, 'particles', log_dir)
save_obj(t, 't', log_dir)
save_obj(log_lklhds, 'log_lklhds', log_dir)

## Plot Both results

In [90]:
particles = load_obj('particles', log_dir)
print(load_obj('t', log_dir))

103


In [91]:
# have to resample particles to get rid of weights
# particles = ibis['particles']
particles.resample_particles()

In [92]:
np.mean(np.squeeze(particles.particles['beta']),0)

array([ 0.02164631,  0.03562466,  0.00404236,  0.02752854, -0.01366537,
       -0.0247474 ])

In [93]:
param = 'beta'
df = get_post_df(ps[param].reshape(1000,1,6,1)) 
df['source'] = 'hmc'
df2 = get_post_df(particles.particles[param])
df2['source'] = 'ibis'
plot_density(pd.concat([df,df2])) 
# &\
# plot_correlations(ibis['jitter_corrs'][param], height = 150)

In [76]:
param = 'alpha'
df = get_post_df(ps[param]) 
df['source'] = 'hmc'
df2 = get_post_df(particles.particles[param])
df2['source'] = 'ibis'
plot_density(pd.concat([df,df2])) 

In [54]:
# sm = load_obj('sm', 'log/compiled_models/model8/')

import pystan

with open(
    './codebase/stancode/models/ibis/model_7.stan',
    'r'
    ) as file:
    model_code = file.read()

    sm = pystan.StanModel(model_code=model_code, verbose=False)
# save_obj(sm, 'sm_hmc', log_dir)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_2a37a1fbf0634d8c97eba3ed5b0bc46d NOW.


In [59]:
stan_test_data = exp_data.get_stan_data().copy()
stan_test_data['zz'] = exp_data.raw_data['z'].copy().reshape((200,1))

In [60]:
# sm = load_obj('sm', 'log/compiled_models/model8/')
# sm = load_obj('sm_hmc', log_dir)

fit_run = run_stan_model(
    data = stan_test_data,
    compiled_model = sm,
    num_samples = 1000, 
    num_warmup = 1000,
    num_chains = 1,
#     initial_values={'beta':[0,0,0,0,0,0]},
    adapt_engaged=True,
    )

In [61]:
param_names = ['beta', 'alpha']

ps = fit_run.extract(
        permuted=False, pars=param_names)
np.mean(np.squeeze(ps['beta']),0)

array([0.86766037, 0.74254563, 0.65507357, 0.74236766, 0.73155055,
       0.52425928])

In [64]:
exp_data.raw_data['beta']


array([1. , 0.7, 0.8, 0.5, 0.9, 0.6])

In [65]:
np.round(np.mean(ps['beta'], 0),2)

array([[[0.87],
        [0.74],
        [0.66],
        [0.74],
        [0.73],
        [0.52]]])