In [1]:
from codebase.classes import Particles
from codebase.classes_data import Data
import pandas as pd
import numpy as np
from codebase.file_utils import (
    save_obj,
    load_obj,
    make_folder,
    path_backslash
)
from codebase.ibis import essl, exp_and_normalise, model_phonebook, run_mcmc
from codebase.plot import get_post_df, plot_density
from codebase.run_tlk import model_phonebook
from tqdm import tqdm
from scipy.special import logsumexp
import altair as alt
alt.data_transformers.disable_max_rows()


import pystan

## Model Legend

* m6 = AZ
* m11 = EZ
* m9 = EFA
* m10 = EFA.C

In [2]:
## 500 particles
# check on mon 18.42
# 6 676
# 9 394
# 10 425
# 11 676




log_dirs = dict()
log_dirs['m6'] = 'log/fabian_batch/20210910_003532_big5_batch_m6_b3/'
log_dirs['m11'] = 'log/fabian_batch/20210910_003728_big5_batch_m11_b3/'
log_dirs['m10'] = 'log/fabian_batch/20210910_003713_big5_batch_m10/'
log_dirs['m9'] = 'log/fabian_batch/20210910_003631_big5_batch_m9/'


for i in [6, 9, 10, 11]:
    print(i, load_obj('t', log_dirs['m'+str(i)]))

6 676
9 394
10 425
11 676


## Load Model Evidence

In [3]:
log_dirs = dict()
log_dirs['m6'] = 'log/fabian_real_muthen/20210829_201302_smallbig5_m6/'
log_dirs['m11'] = 'log/fabian_real_muthen/20210829_201308_smallbig5_m11/'
log_dirs['m10'] = 'log/fabian_real_muthen/20210829_201450_smallbig5_m10/'
log_dirs['m9'] = 'log/fabian_real_muthen/20210829_201305_smallbig5_m9/'
model_nums=[6, 9, 10, 11]

# load existing results
bf = dict()
for model in model_nums:
    bf['M'+str(model)] = np.cumsum(load_obj('log_lklhds', log_dirs['m'+str(model)])[30:])


In [4]:
bfdf = pd.DataFrame.from_dict(bf)
bfdf.columns = ['CFA2', 'EFA', 'EFA.C', 'CFA1']
bfdf['t']  = np.arange(30, len(bfdf)+30)
for i in ['EFA', 'EFA.C', 'CFA1']:
    bfdf['CFA2 / '+str(i)] = bfdf['CFA2']-bfdf[str(i)]
bfdf.drop(['CFA2', 'EFA', 'EFA.C', 'CFA1'], axis=1, inplace=True)
bfdf = bfdf.melt(id_vars='t', var_name='Model')


INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [5]:
width = 600
height = 400
c = alt.Chart(bfdf).mark_line(
    strokeWidth = 2,
    ).encode(
    alt.X('t:Q', title='Index i', scale=alt.Scale(domain=[0,200])),
    alt.Y('value:Q', title='Log Bayes Factor'),
    alt.Color('Model')
    ).properties(
    width=width, height=height
    ).configure_legend(
    titleFontSize=16,
    labelFontSize=14,
    symbolStrokeWidth=6
    ).configure_axis(
    labelFontSize=14,
    titleFontSize=16
).interactive()

c

In [6]:
np.log(100)

4.605170185988092

In [7]:
model_evidence = dict()
for m in ['M'+str(i) for i in model_nums]:
    model_evidence[m] = np.sum(bf[m][30:])

model_evidence

{'M6': -270498.91701128986,
 'M9': -274869.4968816739,
 'M10': -274082.96219079744,
 'M11': -272528.92134763754}

## Scoring Rules


In [8]:
# load existing results
scrl = dict()
for model in model_nums:
    scrl['m'+str(model)] = np.cumsum(load_obj('scoring_rule', log_dirs['m'+str(model)])[31:])


In [9]:
scrldf = pd.DataFrame.from_dict(scrl)
scrldf.columns = ['CFA2', 'EFA', 'EFA.C', 'CFA1']
scrldf['t']  = np.arange(30, len(scrldf)+30)
for i in ['EFA', 'EFA.C', 'CFA1']:
    scrldf[str(i)+' - CFA2'] = scrldf[str(i)]-scrldf['CFA2']
scrldf.drop(['CFA2', 'EFA', 'EFA.C', 'CFA1'], axis=1, inplace=True)
scrldf = scrldf.melt(id_vars='t', var_name='Model')


In [10]:
width = 600
height = 400
c = alt.Chart(scrldf).mark_line(
    strokeWidth = 2,
    ).encode(
    alt.X('t:Q', title='Index i', scale=alt.Scale(domain=[0,200])),
    alt.Y('value:Q', title='Variogram Score'),
    alt.Color('Model')
    ).properties(
    width=width, height=height
    ).configure_legend(
    titleFontSize=16,
    labelFontSize=14,
    symbolStrokeWidth=6
    ).configure_axis(
    labelFontSize=14,
    titleFontSize=16
).interactive()

c
