In [1]:
from codebase.classes import Particles
from codebase.classes_data import Data
import pandas as pd
import numpy as np
from codebase.file_utils import (
    save_obj,
    load_obj,
    make_folder,
    path_backslash
)
from codebase.ibis import essl, exp_and_normalise, model_phonebook, run_mcmc
from codebase.plot import get_post_df, plot_density
from codebase.run_tlk import model_phonebook
from tqdm import tqdm
from scipy.special import logsumexp
import altair as alt
alt.data_transformers.disable_max_rows()


import pystan

## Model Legend

* m5 = AZ
* m7 = EZ
* m9 = EFA
* m10 = EFA-C

## Load Model Evidence

In [2]:
log_dirs = dict()
log_dirs['m5'] = 'log/fabian_sim/EZ/20210912_182218_ezsim_m5/'
log_dirs['m7'] = 'log/fabian_sim/EZ/20210912_182221_ezsim_m7/'
log_dirs['m9'] = 'log/fabian_sim/EZ/20210912_182227_ezsim_m9/'
# log_dirs['m10'] = 'log/fabian_sim/EZ/20210912_182229_ezsim_m10/'
log_dirs['m15'] = 'log/fabian_sim/EZ/20210917_221719_ezsim_1factor_m9/'
log_dirs['m16'] = 'log/fabian_sim/EZ/20210917_221843_ezsim_3factor_m9/'


model_nums = [5,7, 9, 15, 16]

# load existing results
bf = dict()

for model in model_nums:
    bf['m'+str(model)] = np.cumsum(load_obj('log_lklhds', log_dirs['m'+str(model)])[30:])



In [3]:
data = load_obj('data', log_dirs['m5'])
data.raw_data


{'random_seed': 0,
 'N': 200,
 'K': 2,
 'J': 6,
 'alpha': array([0., 0., 0., 0., 0., 0.]),
 'beta': array([[1. , 0. ],
        [0.8, 0. ],
        [0.8, 0. ],
        [0. , 1. ],
        [0. , 0.8],
        [0. , 0.8]]),
 'sigma_z': array([0.80622577, 0.80622577]),
 'Phi_corr': array([[1. , 0.2],
        [0.2, 1. ]]),
 'Phi_cov': array([[0.65, 0.13],
        [0.13, 0.65]]),
 'Marg_cov': array([[1.    , 0.52  , 0.52  , 0.13  , 0.104 , 0.104 ],
        [0.52  , 1.    , 0.416 , 0.104 , 0.0832, 0.0832],
        [0.52  , 0.416 , 1.    , 0.104 , 0.0832, 0.0832],
        [0.13  , 0.104 , 0.104 , 1.    , 0.52  , 0.52  ],
        [0.104 , 0.0832, 0.0832, 0.52  , 1.    , 0.416 ],
        [0.104 , 0.0832, 0.0832, 0.52  , 0.416 , 1.    ]]),
 'Theta': array([[0.35 , 0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.584, 0.   , 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.584, 0.   , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.35 , 0.   , 0.   ],
        [0.   , 0.   , 0.   , 0.   , 0.584, 

In [4]:
bfdf = pd.DataFrame.from_dict(bf)
bfdf.rename({
    'm5':'AZ', 
    'm7':'EZ',
    'm9':'EFA2',
#     'm10':'EFA2.C',
    'm15':'EFA1',
    'm16':'EFA3'
}, axis=1, inplace=True)

bfdf['t']  = np.arange(30, len(bfdf)+30)
for i in ['EFA2', 'EFA1', 'AZ', 'EFA3']:
    bfdf['EZ / '+str(i)] = bfdf['EZ']-bfdf[str(i)]
bfdf.drop(['EFA2', 'EFA1', 'EZ', 'EFA3', 'AZ'], axis=1, inplace=True)
bfdf = bfdf.melt(id_vars='t', var_name='Model')

INFO:numexpr.utils:NumExpr defaulting to 8 threads.


In [5]:
width = 600
height = 400
c = alt.Chart(bfdf).mark_line(
    strokeWidth = 2,
    ).encode(
    alt.X('t:Q', title='Index i', scale=alt.Scale(domain=[0,200])),
    alt.Y('value:Q', title='Log Bayes Factor'),
    alt.Color('Model')
    ).properties(
    width=width, height=height
    ).configure_legend(
    titleFontSize=16,
    labelFontSize=14,
    symbolStrokeWidth=6
    ).configure_axis(
    labelFontSize=14,
    titleFontSize=16
).interactive()

c

In [14]:
bfdf = pd.DataFrame.from_dict(bf)
bfdf.rename({
    'm5':'AZ', 
    'm7':'EZ',
    'm9':'EFA2',
#     'm10':'EFA2.C',
    'm15':'EFA1',
    'm16':'EFA3'
}, axis=1, inplace=True)

bfdf['t']  = np.arange(30, len(bfdf)+30)
for i in ['EFA1', 'EFA3']:
    bfdf['EFA2 / '+str(i)] = bfdf['EFA2']-bfdf[str(i)]
bfdf.drop(['EFA2', 'EFA1', 'EZ', 'EFA3', 'AZ'], axis=1, inplace=True)
bfdf = bfdf.melt(id_vars='t', var_name='Model')

width = 600
height = 400
c = alt.Chart(bfdf).mark_line(
    strokeWidth = 2,
    ).encode(
    alt.X('t:Q', title='Index i', scale=alt.Scale(domain=[0,200])),
    alt.Y('value:Q', title='Log Bayes Factor'),
    alt.Color('Model')
    ).properties(
    width=width, height=height
    ).configure_legend(
    titleFontSize=16,
    labelFontSize=14,
    symbolStrokeWidth=6
    ).configure_axis(
    labelFontSize=14,
    titleFontSize=16
)
c

In [17]:
bfdf = pd.DataFrame.from_dict(bf)
bfdf.rename({
    'm5':'AZ', 
    'm7':'EZ',
    'm9':'EFA2',
#     'm10':'EFA2.C',
    'm15':'EFA1',
    'm16':'EFA3'
}, axis=1, inplace=True)

bfdf['t']  = np.arange(30, len(bfdf)+30)
for i in ['AZ', 'EFA2']:
    bfdf['EZ / '+str(i)] = bfdf['EZ']-bfdf[str(i)]
bfdf.drop(['EFA2', 'EFA1', 'EZ', 'EFA3', 'AZ'], axis=1, inplace=True)
bfdf = bfdf.melt(id_vars='t', var_name='Model')

width = 600
height = 400
c = alt.Chart(bfdf).mark_line(
    strokeWidth = 2,
    ).encode(
    alt.X('t:Q', title='Index i', scale=alt.Scale(domain=[0,200])),
    alt.Y('value:Q', title='Log Bayes Factor'),
    alt.Color('Model')
    ).properties(
    width=width, height=height
    ).configure_legend(
    titleFontSize=16,
    labelFontSize=14,
    symbolStrokeWidth=6
    ).configure_axis(
    labelFontSize=14,
    titleFontSize=16
)
c

In [6]:
model_evidence = dict()
for m in ['m'+str(i) for i in model_nums]:
    model_evidence[m] = bf[m][-1]

model_evidence

{'m5': -1331.267511407236,
 'm7': -1330.9822492129754,
 'm9': -1333.6589205474331,
 'm15': -1391.8250687157076,
 'm16': -1340.209421550288}

In [7]:
for m in ['m'+str(i) for i in [5,9,15,16]]:
    a = np.exp(bf['m7'][-1]-bf[m][-1])
    print(m, "%.1f"%a)

m5 1.3
m9 14.5
m15 265277718317492757027880960.0
m16 10169.7


In [8]:
for m in ['m'+str(i) for i in [15, 16]]:
    a = np.exp(bf['m9'][-1]-bf[m][-1])
    print(a)

1.8248920661015861e+25
699.594584914351


In [9]:
for m in ['m'+str(i) for i in [16]]:
    a = np.exp(bf['m9'][-1]-bf[m][-1])
    print(a)

699.594584914351


## Scoring Rules


In [10]:
# load existing results
scrl = dict()
for model in model_nums:
    scrl['m'+str(model)] = np.cumsum(load_obj('scoring_rule', log_dirs['m'+str(model)])[31:])


In [11]:
scrldf = pd.DataFrame.from_dict(scrl)
scrldf.rename({
    'm5':'AZ', 
    'm7':'EZ',
    'm9':'EFA2',
    'm10':'EFA2.C',
    'm15':'EFA1',
    'm16':'EFA3'
}, axis=1, inplace=True)

scrldf['t']  = np.arange(30, len(scrldf)+30)
for i in ['EFA2', 'EFA1', 'AZ', 'EFA3', 'EFA2.C']:
    scrldf[str(i)+' - EZ'] = scrldf[str(i)]-scrldf['EZ']
scrldf.drop(['EFA2', 'EFA1', 'EZ', 'EFA3', 'EFA2.C', 'AZ'], axis=1, inplace=True)
scrldf = scrldf.melt(id_vars='t', var_name='Model')


KeyError: 'EFA2.C'

In [None]:
width = 600
height = 400
c = alt.Chart(scrldf).mark_line(
    strokeWidth = 2,
    ).encode(
    alt.X('t:Q', title='Index i', scale=alt.Scale(domain=[0,200])),
    alt.Y('value:Q', title='Variogram Score'),
    alt.Color('Model')
    ).properties(
    width=width, height=height
    ).configure_legend(
    titleFontSize=16,
    labelFontSize=14,
    symbolStrokeWidth=6
    ).configure_axis(
    labelFontSize=14,
    titleFontSize=16
).interactive()

c
