In [1]:
from codebase.classes import Particles
from codebase.classes_data import Data
import pandas as pd
import numpy as np
from codebase.file_utils import (
    save_obj,
    load_obj,
    make_folder,
    path_backslash
)
from codebase.ibis import essl, exp_and_normalise, model_phonebook, run_mcmc
from codebase.plot import get_post_df, plot_density
from codebase.run_tlk import model_phonebook
from tqdm import tqdm
from scipy.special import logsumexp
import altair as alt
alt.data_transformers.disable_max_rows()


import pystan

## Model Legend

* m5 = AZ
* m7 = EZ
* m9 = EFA
* m10 = EFA-C

## Load Model Evidence

In [2]:
log_dirs = dict()
# log_dirs['m5'] = 'log/fabian_sim/EZ/20210912_182218_ezsim_m5/'
# log_dirs['m7'] = 'log/fabian_sim/EZ/20210912_182221_ezsim_m7/'
# log_dirs['m9'] = 'log/fabian_sim/EZ/20210912_182227_ezsim_m9/'
# log_dirs['m10'] = 'log/fabian_sim/EZ/20210912_182229_ezsim_m10/'
log_dirs['m5'] = 'log/fabian_sim/CL/20210829_134729_sim_m5/'
log_dirs['m7'] = 'log/fabian_sim/CL/20210829_134725_sim_m7/'
log_dirs['m9'] = 'log/fabian_sim/CL/20210829_134730_sim_m9/'
log_dirs['m10'] = 'log/fabian_sim/CL/20210829_134734_sim_m10/'



model_nums = [5,7, 9, 10]

# load existing results
bf = dict()

for model in model_nums:
    bf['m'+str(model)] = np.cumsum(load_obj('log_lklhds', log_dirs['m'+str(model)])[30:])



In [3]:
data = load_obj('data', log_dirs['m5'])
data.raw_data


{'random_seed': 0,
 'N': 200,
 'K': 2,
 'J': 6,
 'alpha': array([0., 0., 0., 0., 0., 0.]),
 'beta': array([[1. , 0. ],
        [0.8, 0.3],
        [0.8, 0. ],
        [0. , 1. ],
        [0.3, 0.8],
        [0.3, 0.8]]),
 'sigma_z': array([0.80622577, 0.80622577]),
 'Phi_corr': array([[1. , 0.2],
        [0.2, 1. ]]),
 'Phi_cov': array([[0.65, 0.13],
        [0.13, 0.65]]),
 'Marg_cov': array([[1.    , 0.559 , 0.52  , 0.13  , 0.299 , 0.299 ],
        [0.559 , 1.    , 0.4472, 0.299 , 0.4069, 0.4069],
        [0.52  , 0.4472, 1.    , 0.104 , 0.2392, 0.2392],
        [0.13  , 0.299 , 0.104 , 1.    , 0.559 , 0.559 ],
        [0.299 , 0.4069, 0.2392, 0.559 , 1.    , 0.5369],
        [0.299 , 0.4069, 0.2392, 0.559 , 0.5369, 1.    ]]),
 'Theta': array([[0.35  , 0.    , 0.    , 0.    , 0.    , 0.    ],
        [0.    , 0.4631, 0.    , 0.    , 0.    , 0.    ],
        [0.    , 0.    , 0.584 , 0.    , 0.    , 0.    ],
        [0.    , 0.    , 0.    , 0.35  , 0.    , 0.    ],
        [0.    , 0. 

In [4]:
bfdf = pd.DataFrame.from_dict(bf)
bfdf.rename({
    'm5':'CFA2', 
    'm7':'CFA1',
    'm9':'EFA',
    'm10':'EFA.C'
}, axis=1, inplace=True)

bfdf['t']  = np.arange(30, len(bfdf)+30)
for i in ['EFA', 'EFA.C', 'CFA1']:
    bfdf['CFA2 / '+str(i)] = bfdf['CFA2']-bfdf[str(i)]
bfdf.drop(['CFA2', 'EFA', 'EFA.C', 'CFA1'], axis=1, inplace=True)
bfdf = bfdf.melt(id_vars='t', var_name='Model')

INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [5]:
width = 600
height = 400
c = alt.Chart(bfdf).mark_line(
    strokeWidth = 2,
    ).encode(
    alt.X('t:Q', title='Index i', scale=alt.Scale(domain=[0,200])),
    alt.Y('value:Q', title='Log Bayes Factor'),
    alt.Color('Model')
    ).properties(
    width=width, height=height
    ).configure_legend(
    titleFontSize=16,
    labelFontSize=14,
    symbolStrokeWidth=6
    ).configure_axis(
    labelFontSize=14,
    titleFontSize=16
).interactive()

c

In [6]:
model_evidence = dict()
for m in ['m'+str(i) for i in model_nums]:
    model_evidence[m] = np.exp(logsumexp(bf[m][30:]))

model_evidence

{'m5': 4.394606790445718e-108,
 'm7': 7.723572896058449e-109,
 'm9': 4.709361544015453e-109,
 'm10': 1.9651160945145556e-108}

## Scoring Rules


In [7]:
# load existing results
scrl = dict()
for model in model_nums:
    scrl['m'+str(model)] = np.cumsum(load_obj('scoring_rule', log_dirs['m'+str(model)])[31:])


In [8]:
scrldf = pd.DataFrame.from_dict(scrl)
scrldf.rename({
    'm5':'CFA2', 
    'm7':'CFA1',
    'm9':'EFA',
    'm10':'EFA.C'
}, axis=1, inplace=True)

scrldf['t']  = np.arange(30, len(scrldf)+30)
for i in ['EFA', 'EFA.C', 'CFA1']:
    scrldf[str(i)+' - CFA2'] = scrldf[str(i)]-scrldf['CFA2']
scrldf.drop(['CFA2', 'EFA', 'EFA.C', 'CFA1'], axis=1, inplace=True)
scrldf = scrldf.melt(id_vars='t', var_name='Model')


In [9]:
width = 600
height = 400
c = alt.Chart(scrldf).mark_line(
    strokeWidth = 2,
    ).encode(
    alt.X('t:Q', title='Index i', scale=alt.Scale(domain=[0,200])),
    alt.Y('value:Q', title='Variogram Score'),
    alt.Color('Model')
    ).properties(
    width=width, height=height
    ).configure_legend(
    titleFontSize=16,
    labelFontSize=14,
    symbolStrokeWidth=6
    ).configure_axis(
    labelFontSize=14,
    titleFontSize=16
).interactive()

c
