In [1]:
import pandas as pd
import numpy as np
from codebase.file_utils import (
    save_obj,
    load_obj,
)
from codebase.plot import plot_density, plot_line, get_post_df
import altair as alt
from codebase.classes_data import Data
from codebase.ibis import exp_and_normalise
from codebase.file_utils import (
    save_obj,
    load_obj,
    make_folder,
    path_backslash
)
from codebase.ibis import post_process_sign
from pdb import set_trace


alt.data_transformers.disable_max_rows()


DataTransformerRegistry.enable('default')

In [7]:
task_handle = 'smc2_mcmc_init'
gen_model = 0
existing_directory = None

if existing_directory is None:
    log_dir = make_folder(task_handle)  
    print("\n\nCreating new directory: %s" % log_dir)

else:
    log_dir = existing_directory
    log_dir = path_backslash(log_dir)
    print("\n\nReading from existing directory: %s" % log_dir)
    




Creating new directory: ./log/20210222_222524_smc2_mcmc_init/


In [9]:
log_dir = './log/20210222_222524_smc2_mcmc_init/'

In [3]:
from codebase.classes_smc2 import ParticlesLVM
from codebase.ibis import model_phonebook, essl
from codebase.mcmc_tlk_latent import (
    gen_latent_weights_master
)
import numpy as np
from tqdm import tqdm
from codebase.file_utils import (
    save_obj,
    load_obj,
)
from scipy.special import logsumexp
from pdb import set_trace


def run_smc2(
    exp_data,
    model_num,
    size,
    bundle_size,
    gen_model,
    log_dir,
    initial_particles,
    degeneracy_limit=0.5,
    name="smc2",
):

    param_names = model_phonebook(model_num)["param_names"]
    latent_names = model_phonebook(model_num)["latent_names"]
    jitter_corrs = dict()
    for p in param_names:
        jitter_corrs[p] = np.zeros(exp_data.size)
    particles = ParticlesLVM(
        name="ibis_lvm",
        model_num=model_num,
        size=size,
        bundle_size=bundle_size,
        param_names=param_names,
        latent_names=latent_names,
        latent_model_num=1,
    )
    particles.set_log_dir(log_dir)
    if gen_model:
        particles.compile_prior_model()
        particles.compile_model()
    else:
        particles.load_prior_model()
        particles.load_model()

    log_lklhds = np.empty(exp_data.size)
    degeneracy_limit = 0.5
    
    particles.initialize_particles()
    particles.initialize_bundles(exp_data.get_stan_data())
    particles.initialize_latent_var_given_theta(exp_data.get_stan_data())
    particles.sample_prior_particles(exp_data.get_stan_data())  # sample prior particles    
    for m in range(particles.size):
        particles.particles[m].particles['beta'] = initial_particles['beta'][m]
        particles.particles[m].particles['alpha'] = initial_particles['alpha'][m]
    
    particles.reset_weights()  # set weights to 0
    particles.initialize_counter(exp_data.get_stan_data())

    for t in tqdm(range(50, exp_data.size)):
        particles.sample_latent_bundle_at_t(t, exp_data.get_stan_data_at_t2(t))
        particles.get_theta_incremental_weights_at_t(exp_data.get_stan_data_at_t(t))
        log_lklhds[t] = particles.get_loglikelihood_estimate()
        particles.update_weights()

        
        if (essl(particles.weights) < degeneracy_limit * particles.size) and (
            t + 1
        ) < exp_data.size:
            particles.add_ess(t)
            particles.resample_particles_bundles()

            particles.gather_latent_variables_up_to_t(
                t+1, 
                exp_data.get_stan_data_upto_t(t+1)
            )

            particles.jitter_bundles_and_pick_one(exp_data.get_stan_data_upto_t(t + 1))
            particles.check_latent_particles_are_distinct()
            
            # add corr of param before jitter
            pre_jitter = dict()
            for p in param_names:
                pre_jitter[p] = particles.extract_particles_in_numpy_array(p).flatten()
            ###
            particles.jitter(exp_data.get_stan_data_upto_t(t + 1))

            # add corr of param
            for p in param_names:
                jitter_corrs[p][t] = np.corrcoef(
                    pre_jitter[p], particles.extract_particles_in_numpy_array(p).flatten()
                )[0, 1]
            ###
            

            particles.reset_weights()
            particles.check_particles_are_distinct()
        else:
            pass

        save_obj(t, "t", log_dir)
        save_obj(particles, "particles", log_dir)
        save_obj(jitter_corrs, "jitter_corrs", log_dir)
        save_obj(log_lklhds, "log_lklhds", log_dir)

    print("\n\n")
    marg_lklhd = np.exp(logsumexp(log_lklhds))
    print("Marginal Likelihood %.5f" % marg_lklhd)
    save_obj(marg_lklhd, "marg_lklhd", log_dir)

    output = dict()
    output["particles"] = particles
    output["log_lklhds"] = log_lklhds
    output["marg_lklhd"] = marg_lklhd
    output["jitter_corrs"] = jitter_corrs
    return output


In [10]:

# generate data
exp_data = Data(
    name = 'smc2', 
    model_num = 1,  
    size = 200,
    random_seed = 2
    )

 
exp_data.generate()
save_obj(exp_data, 'data', log_dir)
model_num =2 

In [10]:
from run_mcmc import run_mcmc

num_warmup = 200
num_samples = 1000
gen_model = False

param_names = ['beta', 'alpha']
latent_names = ['z', 'y']
ps = run_mcmc(
    stan_data=exp_data.get_stan_data_upto_t(50),
    nsim_mcmc=num_samples,
    num_warmup = num_warmup,
    model_num=2,
    bundle_size=100,
    gen_model=gen_model,
    param_names=param_names,
    latent_names=latent_names,
    log_dir=log_dir,
    adapt_nsim=100
)

save_obj(ps, 'mcmc_post_samples', log_dir)


100%|██████████| 1000/1000 [10:26<00:00,  1.60it/s]


In [11]:
ps = load_obj('mcmc_post_samples', log_dir)


In [12]:
init = dict()
init['beta'] = ps['beta'][::2]
init['alpha'] = ps['alpha'][::2]


##

In [13]:
smc2 = run_smc2(
    exp_data = exp_data,
    model_num = 2,
    size = 500,
    bundle_size = 100 ,
    gen_model = False,
    log_dir = log_dir,
    initial_particles= init,
    degeneracy_limit=0.5,
    name="smc2",
)

100%|██████████| 150/150 [5:20:08<00:00, 128.06s/it]  




Marginal Likelihood inf





In [27]:
save_obj(smc2, 'smc2', log_dir)


In [25]:
particles = smc2['particles']

In [28]:
particles.extract_particles_in_numpy_array('beta')

array([[[-0.37181044],
        [-0.55540187],
        [-0.22507687],
        [-0.42723373],
        [-0.35100037],
        [-0.7741572 ]],

       [[ 0.27469449],
        [ 0.50722065],
        [ 0.00519058],
        [ 0.20961738],
        [ 0.72375394],
        [ 0.06089776]],

       [[-0.0537538 ],
        [ 0.5692004 ],
        [ 0.4988369 ],
        [ 0.03802511],
        [ 0.16497868],
        [ 0.66510702]],

       ...,

       [[-0.09447956],
        [ 0.04612141],
        [-0.53054904],
        [-0.16416096],
        [-0.19252079],
        [ 0.19850734]],

       [[ 0.11716904],
        [ 0.61301755],
        [ 0.41272381],
        [ 0.39393501],
        [ 0.34902675],
        [ 0.63890587]],

       [[-0.25609193],
        [ 0.71862435],
        [ 0.43928804],
        [ 0.13744339],
        [ 0.10633901],
        [ 0.28081014]]])

In [29]:
corrs = load_obj('jitter_corrs', log_dir)
corrs['beta']

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.68731486, 0.        ,
       0.        , 0.63884241, 0.        , 0.        , 0.62719202,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.53890825, 0.        , 0.628479  , 0.        ,
       0.66951228, 0.        , 0.        , 0.71119511, 0.     

In [30]:
particles.get_acceptance_rate_for_particle_m(0)[particles.get_threshold_ess_indicator()]

  return (accs/trials)


array([1.        , 0.8       , 0.91666667, 0.82608696, 1.        ,
       0.76190476, 0.8       , 1.        , 0.94444444, 0.94117647,
       1.        , 0.73333333, 1.        , 0.92307692, 0.91666667,
       1.        , 1.        , 1.        , 0.875     , 0.85714286,
       1.        , 1.        , 0.75      , 1.        , 1.        ,
       1.        ])

In [27]:
particles['particles'.get_threshold_ess_indicator()

array([False,  True,  True,  True, False,  True, False, False,  True,
       False, False,  True,  True, False, False, False,  True,  True,
       False, False, False,  True,  True, False, False,  True, False,
       False, False, False, False,  True, False, False, False,  True,
       False, False, False,  True, False, False, False, False, False,
        True, False, False, False, False,  True, False, False, False,
       False,  True, False, False, False, False, False,  True, False,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [31]:
particles.resample_particles_bundles()

# particles.gather_latent_variables_up_to_t(
#     t+1, 
#     exp_data.get_stan_data_upto_t(t+1)
# )

# particles.jitter_bundles_and_pick_one(exp_data.get_stan_data_upto_t(t + 1))
# particles.check_latent_particles_are_distinct()

# particles.jitter(exp_data.get_stan_data_upto_t(t + 1))

In [32]:
ps_smc2 = dict()
for name in particles.param_names:
    ps_smc2[name] = particles.extract_particles_in_numpy_array(name)


In [33]:
ps_smc2 = post_process_sign(ps_smc2)


## Plot

In [34]:
param = 'beta'
df = get_post_df(ps_smc2[param]) 
df['source'] = 'smc2'
plot_density(df, height=100)

INFO:numexpr.utils:Note: NumExpr detected 24 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [35]:
param = 'alpha'
df = get_post_df(ps_smc2[param]) 
df['source'] = 'smc2'
plot_density(df, height=100)

## Plot MCMC samples

In [36]:
param = 'beta'
df = get_post_df(ps_smc2[param])
df_quant = df.groupby(['row', 'col'])[['value']].quantile(0.025).reset_index()
df_quant.rename({'value':'q1'}, axis=1, inplace=True)
df_quant2 = df.groupby(['row', 'col'])[['value']].quantile(0.975).reset_index()
df_quant2.rename({'value':'q2'}, axis=1, inplace=True)

df = df_quant.merge(df_quant2, on=['row', 'col'])

# simple quantile chart
df['source'] = 'smc2'
c1 = alt.Chart(df).mark_bar(opacity=0.6).encode(
        alt.X('q1', title=None),
        alt.X2('q2', title=None),
        alt.Row('row'),
        alt.Column('col'),
        alt.Color('source')
)
c1

In [37]:
df['index'] = 'r_' + df.row.astype(str)+'.c_'+df.col.astype(str)
df = df.loc[:,['index', 'q1', 'q2']]
df['source'] = 'smc2'
dd = pd.DataFrame(exp_data.raw_data['beta'], columns=['data'])
dd['col'] = 0
dd['row'] = np.arange(6)
dd['index'] = 'r_' + dd.row.astype(str)+'.c_'+dd.col.astype(str)
dd = dd.loc[:,['index', 'data']]
plot_data = df.merge(dd, on=['index'])

In [38]:
c1 = alt.Chart(plot_data).mark_bar(opacity=0.6).encode(
    alt.X('q1', title=None,  scale=alt.Scale(domain=[-2,2])),
    alt.X2('q2', title=None),
    alt.Color('source'),    
)
    

c2 = alt.Chart(plot_data).mark_point(opacity=1, color='red').encode(
        alt.X('data', title=None),
)
(c1+c2).facet(
       'index',
    columns=1
    )


In [39]:
load_obj('marg_lklhd', log_dir)

inf