In [15]:
import sys
import os
import equinox as eqx
import jax.tree_util as jtu
import pandas as pd
import sqlalchemy
import seaborn as sns
import matplotlib.pyplot as plt
import pub_ready_plots
from pub_ready_plots import get_mpl_rcParams
sys.path.append("../../..")
from lib.ml.experiment import Experiment 
from lib.base import Config
from lib.utils import load_config 
from lib.ehr import TVxEHR

rc_params, fig_width_in, fig_height_in = pub_ready_plots.get_mpl_rcParams(
    width_frac=1,  # between 0 and 1
    height_frac=0.2,  # between 0 and 1
    layout="jmlr"  # or "iclr", "neurips", "poster-portrait", "poster-landscape"
)
rc_params['figure.constrained_layout.use'] = True
plt.rcParams.update(rc_params)



RESULTS_DIR = "aki_evals"
CONFIGS_DIR = f"{RESULTS_DIR}/configs"
db_name = [#f"{RESULTS_DIR}/seg_evals.sqlite",
           # f"{RESULTS_DIR}/seg_emergency_evals.sqlite",
           f"{RESULTS_DIR}/seg_auto.sqlite",
          f"{RESULTS_DIR}/imp_baselines.sqlite"
]





In [16]:
MISSINGNESS_DATASET = "/home/asem/GP/ehr-data/g0g1_missingness_data"


## 30-Nov Note

From this Notebook, we will generate the results of ICNN (temporal section).
It will also feed into the Koopman imputation results.

In [30]:

def model_name(exp):
    if 'inicenodelite_' in exp:
        return 'eICE-NODE'
    if 'inicenodeliteicnn_' in exp:
        return 'ODE-ICNN'
    if 'inicenodelitemech_' in exp:
        return 'mICE-NODE'
    if 'inicenodelitestochastic_' in exp:
        return 'sICE-NODE'
    if 'gruodebayes' in exp:
        return 'GRU-ODE-Bayes'
    if 'ingru' in exp:
        return 'Intervals-GRU'
    if 'inkoopman' in exp:
        return 'Koopman'
    if 'inicenodeliteicnn' in exp:
        return 'ODE-ICNN'
    if 'rect' in exp.lower():
        return 'RectiLinear'
    if 'innaivesequentialgru' in exp:
        return 'Sequential-GRU'
    if 'AutoODEICNN' in exp:
        return 'iODE-ICNN'
    if 'AutoKoopmanICNN' in exp:
        return 'iKoopman-ICNN'
    if 'iter' in exp:
        return 'iter_imputer'
    if 'knn' in exp:
        return 'knn_imputer'
    if 'mean' in exp:
        return 'mean_imputer'
    if 'zero' in exp:
        return 'zero_imputer'
        
    assert False, f"Model not captured in {exp}"

def loss_name(exp):
    if 'bce' in exp:
        return 'bce'
        
    return 'mse'


def batch_size(exp):
    if 'rectilinear' in exp or 'innaivesequentialgru' in exp:
        return 2

    if 'B2' in exp:
        return 2
    else:
        return 64

    assert False

def icenode_dyn(exp):
    if 'Koopman' in exp:
        return 'NA'
    if 'grudyn' in exp:
        return 'gru'
    if 'ODE' in exp:
        return 'mlp'
    else:
        return 'NA'

def optimiser(exp):
    if 'adam' in exp:
        return 'adam'
    if 'lamb' in exp:
        return 'lamb'
    if 'novo' in exp:
        return 'novograd'
    return 'adam'
        


def sql2dataframe(db):
    engine = sqlalchemy.create_engine("sqlite:///%s" % db, execution_options={"sqlite_raw_colnames": True},
                                     connect_args={'timeout': 5})
    
    df = {name: pd.read_sql_table(name, engine) for name in 
          ('evaluation_runs', 'evaluation_status', 'experiments', 'metrics', 'results')}
    df['results']
    
    metrics = df['metrics'].rename(columns={'name': 'metric', 'id': 'metric_id'})
    eval_runs = df['evaluation_runs'].rename(columns={'id': 'evaluation_id'})
    experiments = df['experiments'].rename(columns={'name': 'experiment', 'id': 'experiment_id'})
    eval_status = df['evaluation_status'].rename(columns={'id': 'status_id', 'name': 'status'})
    
    res = pd.merge(df['results'], metrics, left_on='metric_id', right_on='metric_id', how='left')
    res = pd.merge(res, eval_runs, left_on='evaluation_id', right_on='evaluation_id', how='left')
    res = pd.merge(res, experiments, left_on='experiment_id', right_on='experiment_id', how='left')
    res = pd.merge(res, eval_status, left_on='status_id', right_on='status_id', how='left')
    res['step'] = res.snapshot.str.extract('(\d+)').astype(int)

    res = res.sort_values(['experiment_id', 'step'])
    res['last_max'] = float('nan')
    res['last_min'] = float('nan')
    res['is_max'] = False
    res['is_min'] = False
    res['max'] = float('nan')
    res['min'] = float('nan')
    
    for exp, exp_df in res.groupby('experiment_id'):
        for metric, metric_df in exp_df.groupby('metric'):
            index = metric_df.index
            res.loc[index, 'last_max'] = metric_df['value'].cummax()
            res.loc[index, 'last_min'] = metric_df['value'].cummin()
            res.loc[index, 'is_max'] = metric_df['value'] == res.loc[index, 'last_max']
            res.loc[index, 'is_min'] = metric_df['value'] == res.loc[index, 'last_min']
            res.loc[index, 'max'] = metric_df['value'].max()
            res.loc[index, 'min'] = metric_df['value'].min()
            
    
    res = res[[col for col in res.columns if 'id' not in col]]

    res['model'] = res.experiment.map(model_name)
    res['loss'] = res.experiment.map(loss_name)
    res['batch_size'] = res.experiment.map(batch_size)
    res['icenode_dyn'] = res.experiment.map(icenode_dyn)
    res['optimiser'] = res.experiment.map(icenode_dyn)
    return res


def experiment_mincost_snapshots(dataframe, 
                                 cost_metrics = ['MSE.imputation']):
    def experiment_step_min_cost(experiment_df):
        return pd.Series({'best_step': experiment_df.loc[experiment_df['value'].idxmin(), 'step']})
        
    results2 = dataframe[dataframe.metric.isin(cost_metrics)]
    results2 = results2[['experiment', 'step', 'value']].groupby(['experiment', 'step']).sum().reset_index()
    results2 = results2.groupby('experiment').apply(experiment_step_min_cost).reset_index()
    return results2.set_index('experiment')['best_step'].to_dict()




In [31]:
results = pd.concat([sql2dataframe(db) for db in db_name])

In [32]:
results.model.unique()

array(['iODE-ICNN', 'iKoopman-ICNN', 'iter_imputer', 'knn_imputer',
       'mean_imputer', 'zero_imputer', 'RectiLinear'], dtype=object)

In [33]:
results.metric.unique()

array(['MSE.forecast', 'r2.forecast', 'MICRO-AVG(r2).forecast',
       'MACRO-AVG(r2)*.forecast', 'MSE.imputation', 'r2.imputation',
       'MICRO-AVG(r2).imputation', 'MACRO-AVG(r2)*.imputation',
       'blood_chemistry.albumin.MSE.forecast',
       'blood_chemistry.aniongap.MSE.forecast',
       'blood_chemistry.bicarbonate.MSE.forecast',
       'blood_chemistry.bun.MSE.forecast',
       'blood_chemistry.calcium.MSE.forecast',
       'blood_chemistry.chloride.MSE.forecast',
       'blood_chemistry.creatinine.MSE.forecast',
       'blood_chemistry.globulin.MSE.forecast',
       'blood_chemistry.glucose.MSE.forecast',
       'blood_chemistry.potassium.MSE.forecast',
       'blood_chemistry.sodium.MSE.forecast',
       'blood_chemistry.total_protein.MSE.forecast',
       'blood_diff.atypical_lymphocytes.MSE.forecast',
       'blood_diff.bands.MSE.forecast',
       'blood_diff.basophils.MSE.forecast',
       'blood_diff.basophils_abs.MSE.forecast',
       'blood_diff.eosinophils.MSE.fore

In [34]:
best_experiment_snapshot = experiment_mincost_snapshots(results)

In [35]:
results

Unnamed: 0,value,metric,created_at,updated_at,snapshot,experiment,status,step,last_max,last_min,is_max,is_min,max,min,model,loss,batch_size,icenode_dyn,optimiser
41616,0.131115,MSE.forecast,2024-10-03 23:16:39.836469,2024-10-04 05:42:38.103329,step0240.eqx,AutoODEICNN_novo_mlpdyn_B2,FINISHED,240,0.131115,0.131115,True,True,0.176915,0.085957,iODE-ICNN,mse,2,mlp,mlp
41617,-0.080133,r2.forecast,2024-10-03 23:16:39.836469,2024-10-04 05:42:38.103329,step0240.eqx,AutoODEICNN_novo_mlpdyn_B2,FINISHED,240,-0.080133,-0.080133,True,True,0.291881,-0.457432,iODE-ICNN,mse,2,mlp,mlp
41618,-0.256129,MICRO-AVG(r2).forecast,2024-10-03 23:16:39.836469,2024-10-04 05:42:38.103329,step0240.eqx,AutoODEICNN_novo_mlpdyn_B2,FINISHED,240,-0.256129,-0.256129,True,True,0.176501,-0.694904,iODE-ICNN,mse,2,mlp,mlp
41619,-14.232583,MACRO-AVG(r2)*.forecast,2024-10-03 23:16:39.836469,2024-10-04 05:42:38.103329,step0240.eqx,AutoODEICNN_novo_mlpdyn_B2,FINISHED,240,-14.232583,-14.232583,True,True,-1.070675,-14.232583,iODE-ICNN,mse,2,mlp,mlp
41620,0.141519,MSE.imputation,2024-10-03 23:16:39.836469,2024-10-04 05:42:38.103329,step0240.eqx,AutoODEICNN_novo_mlpdyn_B2,FINISHED,240,0.141519,0.141519,True,True,0.141519,0.062453,iODE-ICNN,mse,2,mlp,mlp
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1015,0.276219,vital.sbp.r2.imputation,2024-12-01 16:12:34.061540,NaT,step0,rectlinear,FINISHED,0,0.276219,0.276219,True,True,0.276219,0.276219,RectiLinear,mse,64,,
1016,0.217735,vital.sbp_ni.r2.imputation,2024-12-01 16:12:34.061540,NaT,step0,rectlinear,FINISHED,0,0.217735,0.217735,True,True,0.217735,0.217735,RectiLinear,mse,64,,
1017,0.125346,vital.spo2.r2.imputation,2024-12-01 16:12:34.061540,NaT,step0,rectlinear,FINISHED,0,0.125346,0.125346,True,True,0.125346,0.125346,RectiLinear,mse,64,,
1018,0.190967,vital.temperature.r2.imputation,2024-12-01 16:12:34.061540,NaT,step0,rectlinear,FINISHED,0,0.190967,0.190967,True,True,0.190967,0.190967,RectiLinear,mse,64,,


In [36]:
best_experiment_snapshot

{'AutoKoopmanICNN_B2': 2990,
 'AutoKoopmanICNN_lr2_optlamb_B16': 1380,
 'AutoODEICNN_B2': 1740,
 'AutoODEICNN_ademamix_mlp_B8': 240,
 'AutoODEICNN_lamb_mlpdyn_B2': 5990,
 'AutoODEICNN_lr2_optlamb_B16': 2490,
 'AutoODEICNN_novo_mlpdyn_B2': 9490,
 'iter': 0,
 'knn': 0,
 'mean': 0,
 'rectlinear': 0,
 'zero': 0}

In [37]:
experiments = pd.DataFrame(results.experiment.unique(), columns=['label'])
experiments['model'] = experiments.label.map(model_name)
experiments['loss'] = experiments.label.map(loss_name)
experiments['batch_size'] = experiments.label.map(batch_size)
experiments['icenode_dyn'] =  experiments.label.map(icenode_dyn)
experiments['optimiser'] = experiments.label.map(optimiser)
experiments = experiments.set_index(['model', 'icenode_dyn', 'loss',  'batch_size', 'optimiser']).sort_index()
print(experiments.index.is_unique)
experiments

True


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,label
model,icenode_dyn,loss,batch_size,optimiser,Unnamed: 5_level_1
RectiLinear,,mse,64,adam,rectlinear
iKoopman-ICNN,,mse,2,adam,AutoKoopmanICNN_B2
iKoopman-ICNN,,mse,64,lamb,AutoKoopmanICNN_lr2_optlamb_B16
iODE-ICNN,mlp,mse,2,adam,AutoODEICNN_B2
iODE-ICNN,mlp,mse,2,lamb,AutoODEICNN_lamb_mlpdyn_B2
iODE-ICNN,mlp,mse,2,novograd,AutoODEICNN_novo_mlpdyn_B2
iODE-ICNN,mlp,mse,64,adam,AutoODEICNN_ademamix_mlp_B8
iODE-ICNN,mlp,mse,64,lamb,AutoODEICNN_lr2_optlamb_B16
iter_imputer,,mse,64,adam,iter
knn_imputer,,mse,64,adam,knn


In [None]:
# experiments.model.unique()

In [38]:
# results.metric.unique()
results.groupby('experiment')['step'].unique()

experiment
AutoKoopmanICNN_B2                 [240, 490, 740, 990, 1240, 1490, 1590, 1670, 1...
AutoKoopmanICNN_lr2_optlamb_B16    [240, 490, 740, 870, 990, 1240, 1380, 1490, 1990]
AutoODEICNN_B2                     [240, 490, 740, 990, 1240, 1390, 1490, 1590, 1...
AutoODEICNN_ademamix_mlp_B8        [240, 490, 740, 860, 990, 1240, 1490, 1640, 17...
AutoODEICNN_lamb_mlpdyn_B2         [240, 490, 740, 990, 1240, 1490, 1740, 1890, 1...
AutoODEICNN_lr2_optlamb_B16        [190, 240, 290, 360, 490, 740, 990, 1050, 1240...
AutoODEICNN_novo_mlpdyn_B2         [240, 490, 740, 990, 1190, 1240, 1290, 1300, 1...
iter                                                                             [0]
knn                                                                              [0]
mean                                                                             [0]
rectlinear                                                                       [0]
zero                                                  

In [41]:
results.set_index(['model', 'icenode_dyn', 'loss', 'batch_size', 'optimiser']).sort_index().index.is_unique

False

In [42]:


def make_selection(results_dataframe, selectors):
    y = results_dataframe
    for k, vs in selectors.items():
        y = y[y[k].isin(vs)]
    return y


## MSE / MACRO-AVERAGE table

In [85]:
accuracy_results_df = make_selection(results, selectors = {
    'metric': [
    'MSE.imputation', 'r2.imputation',
    'MICRO-AVG(r2).imputation', 
    'MACRO-AVG(r2)*.imputation'
    ],
    # 'model': [
    #     'iKoopman-ICNN',
    #     'RectiLinear', 
    #     'iODE-ICNN', 
    #     'iter_imputer',
    #     'knn_imputer',
    #     'mean_imputer',
    #     'zero_imputer'
    # ],
    # 'predictor': [
    #     'mlp',
    #     'monotonic'
    # ],
    # 'icenode_dyn': [
    #     'mlp', 
    #     'gru', 
    #     'NA'
    # ],
    # 'loss': [
    #     'mse', 
    #     'bce'
    # ],
    # 'batch_size':
    # [
    #     64,
    #     2
    # ],
    'experiment': 
    [
        'AutoKoopmanICNN_lr2_optlamb_B16',
        'rectlinear', 
        'AutoODEICNN_novo_mlpdyn_B2', 
        'iter',
        'knn',
        'mean',
        'zero'
    ]
})

accuracy_results_df['best_snapshot'] = accuracy_results_df.experiment.map(best_experiment_snapshot)
accuracy_results_df = accuracy_results_df[accuracy_results_df.step == accuracy_results_df.best_snapshot]
accuracy_results_df = accuracy_results_df[['experiment', 'metric', 'value']]
# auc_top_results_df = auc_results_df.groupby(['experiment', 'metric'])[['value']].max().reset_index()
accuracy_results_df.experiment = accuracy_results_df.experiment.map(model_name)

accuracy_results_df = accuracy_results_df.rename(columns={"metric": "Metric", "experiment": "Model"})
accuracy_results_df

Unnamed: 0,Model,Metric,value
73852,iODE-ICNN,MSE.imputation,0.062453
73853,iODE-ICNN,r2.imputation,0.48551
73854,iODE-ICNN,MICRO-AVG(r2).imputation,0.401679
73855,iODE-ICNN,MACRO-AVG(r2)*.imputation,-2.232939
48964,iKoopman-ICNN,MSE.imputation,0.10931
48965,iKoopman-ICNN,r2.imputation,0.099496
48966,iKoopman-ICNN,MICRO-AVG(r2).imputation,-0.047231
48967,iKoopman-ICNN,MACRO-AVG(r2)*.imputation,-6.327054
0,iter_imputer,MSE.imputation,0.067374
1,iter_imputer,r2.imputation,0.444972


In [86]:
accuracy_results_tb = accuracy_results_df.copy()
accuracy_results_tb.Metric = accuracy_results_tb.Metric.map(lambda m: m.split('.')[0].replace('r2', r'$R^2$'))
accuracy_results_tb = accuracy_results_tb.pivot_table(index="Model", values='value',  columns='Metric')
accuracy_results_tb = accuracy_results_tb.loc[:,  ['MSE', r'MICRO-AVG($R^2$)', r'MACRO-AVG($R^2$)*'] ]
accuracy_results_tb

Metric,MSE,MICRO-AVG($R^2$),MACRO-AVG($R^2$)*
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
RectiLinear,0.026943,0.74188,0.008907
iKoopman-ICNN,0.10931,-0.047231,-6.327054
iODE-ICNN,0.062453,0.401679,-2.232939
iter_imputer,0.067374,0.354536,-0.239049
knn_imputer,0.158953,-0.522822,0.105525
mean_imputer,0.10532,-0.009002,-0.003999
zero_imputer,0.192525,-0.844452,-4.511775


In [87]:
accuracy_results_tb = accuracy_results_tb.sort_values('MSE', ascending=False, axis=0)
accuracy_results_tb.index =  list(map(lambda c: f'\\texttt{{{c}}}', accuracy_results_tb.index))
accuracy_results_tb.index = list(map(lambda c: c.replace("_", "\\_"), accuracy_results_tb.index))
accuracy_results_tb.columns.name = 'Model'
accuracy_results_tb

Model,MSE,MICRO-AVG($R^2$),MACRO-AVG($R^2$)*
\texttt{zero\_imputer},0.192525,-0.844452,-4.511775
\texttt{knn\_imputer},0.158953,-0.522822,0.105525
\texttt{iKoopman-ICNN},0.10931,-0.047231,-6.327054
\texttt{mean\_imputer},0.10532,-0.009002,-0.003999
\texttt{iter\_imputer},0.067374,0.354536,-0.239049
\texttt{iODE-ICNN},0.062453,0.401679,-2.232939
\texttt{RectiLinear},0.026943,0.74188,0.008907


In [88]:
accuracy_results_stb = (accuracy_results_tb.style
              # .background_gradient(cmap='RdYlGn', axis=1, low=-0, high=0.5,  vmin=-0.6, vmax=0.3, subset= pd.IndexSlice[[r'$R^2$'], :])
              .background_gradient(cmap='RdYlGn_r', axis=0,  low=0.03, high=0.200,  vmin=0.03, vmax=0.200, subset= pd.IndexSlice[:, ['MSE']])
              # .apply_index(lambda x: ["background-color: #E5E4E2;"] * len(x))
              .format(precision=3))
accuracy_results_stb

Model,MSE,MICRO-AVG($R^2$),MACRO-AVG($R^2$)*
\texttt{zero\_imputer},0.193,-0.844,-4.512
\texttt{knn\_imputer},0.159,-0.523,0.106
\texttt{iKoopman-ICNN},0.109,-0.047,-6.327
\texttt{mean\_imputer},0.105,-0.009,-0.004
\texttt{iter\_imputer},0.067,0.355,-0.239
\texttt{iODE-ICNN},0.062,0.402,-2.233
\texttt{RectiLinear},0.027,0.742,0.009


In [91]:
accuracy_results_ltx = (accuracy_results_stb.to_latex(caption=r"Comparison between imputation methods. The estimations are made on the validation split. "
                                                              r"For the $\text{MACRO-AVG}(R^2)*$, features with less than $N<300$ in "
                                                              r"the test split are excluded, "
                                                              r"of which some have $-\infty$ values.",
                       position_float="centering",
                       convert_css=True,
                       hrules=True,
                                                     label="table:ch_energy:temporal_performance_comparison")
              .replace('\\toprule', '\\hline').replace('\\midrule', '\\hline').replace('\\bottomrule','\\hline'))
print(accuracy_results_ltx)

\begin{table}
\centering
\caption{Comparison between imputation methods. The estimations are made on the validation split. For the $\text{MACRO-AVG}(R^2)*$, features with less than $N<300$ in the test split are excluded, of which some have $-\infty$ values.}
\label{table:ch_energy:temporal_performance_comparison}
\begin{tabular}{lrrr}
\hline
Model & MSE & MICRO-AVG($R^2$) & MACRO-AVG($R^2$)* \\
\hline
\texttt{zero\_imputer} & {\cellcolor[HTML]{F36B42}} \color[HTML]{F1F1F1} 0.193 & -0.844 & -4.512 \\
\texttt{knn\_imputer} & {\cellcolor[HTML]{FECA79}} \color[HTML]{000000} 0.159 & -0.523 & 0.106 \\
\texttt{iKoopman-ICNN} & {\cellcolor[HTML]{DAF08D}} \color[HTML]{000000} 0.109 & -0.047 & -6.327 \\
\texttt{mean\_imputer} & {\cellcolor[HTML]{D1EC86}} \color[HTML]{000000} 0.105 & -0.009 & -0.004 \\
\texttt{iter\_imputer} & {\cellcolor[HTML]{69BE63}} \color[HTML]{F1F1F1} 0.067 & 0.355 & -0.239 \\
\texttt{iODE-ICNN} & {\cellcolor[HTML]{54B45F}} \color[HTML]{F1F1F1} 0.062 & 0.402 & -2.233 \\
\te

## Cost Tables: $L_f$: Forecasting MSE, $L_\text{aki}$: Early Prediction Cost, $L_f + L_\text{aki}$ 

In [34]:
cost_results_df = make_selection(results, selectors = {
    'metric': [
        'LeadPredictionLossMetric.mse',
        'ObsPredictionLossMetric.mse'
    ],
    # 'model': [
        # 'eICE-NODE', 
        # 'GRU-ODE-Bayes', 
    #     'GRU'
    # ],
    # 'predictor': [
    #     'mlp',
    #     'monotonic'
    # ],
    # 'icenode_dyn': [
    #     'mlp', 
    #     'gru', 
    #     'NA'
    # ],
    # 'loss': [
    #     'mse', 
    #     'bce'
    # ],
    # 'batch_size':
    # [
    #     64,
    #     2
    # ],
    'experiment': 
    [
        "mlp_mse_inicenodelite_g0", 
        "monotonic_bce_gruodebayes_g0", 
        "mlp_mse_ingru_g0",
        "innaivesequentialgru_mlp_g0",
        # "innaivesequentialgru_mono_g0",
        "rectilinear_mlp_g0",
        # "rectilinear_mono_g0"
    ]
})

cost_results_df['best_snapshot'] = cost_results_df.experiment.map(best_experiment_snapshot)
cost_top_results_df = cost_results_df[cost_results_df.step == cost_results_df.best_snapshot]
cost_top_results_df = cost_top_results_df[['metric', 'value', 'model']]

window_map = {
    'LeadPredictionLossMetric.mse': r"$L_q$",
    'ObsPredictionLossMetric.mse': r"$L_z$",
}
cost_top_results_df.metric = cost_top_results_df.metric.map(window_map)

cost_top_results_df = cost_top_results_df.pivot(index="model", columns="metric", values="value")
cost_top_results_df[r"$L_z + L_q$"] = cost_top_results_df[r"$L_q$"] + cost_top_results_df[r"$L_z$"]
cost_top_results_df.columns.name = ''
cost_top_results_df.index.name = 'Model'
cost_top_results_df

Unnamed: 0_level_0,$L_q$,$L_z$,$L_z + L_q$
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
GRU-ODE-Bayes,0.218311,0.054221,0.272532
Intervals-GRU,0.147424,0.035038,0.182462
RectiLinear,0.202965,,
Sequential-GRU,0.217091,,
eICE-NODE,0.195042,0.021599,0.216641


In [35]:
with pd.option_context(#'styler.format.escape', 'latex',
                        #'styler.latex.multicol_align', 'c',
                        'styler.sparse.index', True,
                       'styler.format.precision', 3):
    cost_top_results_df_ = cost_top_results_df.copy()
    cost_top_results_df_.index = list(map(lambda m: rf'\texttt{{{m}}}', cost_top_results_df_.index))

    print(cost_top_results_df_.to_latex(buf=None,  header=True, 
             index=True, na_rep='NaN', escape=False, 
             multicolumn=True, 
                                        multicolumn_format='|c|', 
             multirow=True))


\begin{tabular}{lrrr}
\toprule
 & $L_q$ & $L_z$ & $L_z + L_q$ \\
\midrule
\texttt{GRU-ODE-Bayes} & 0.218 & 0.054 & 0.273 \\
\texttt{Intervals-GRU} & 0.147 & 0.035 & 0.182 \\
\texttt{RectiLinear} & 0.203 & NaN & NaN \\
\texttt{Sequential-GRU} & 0.217 & NaN & NaN \\
\texttt{eICE-NODE} & 0.195 & 0.022 & 0.217 \\
\bottomrule
\end{tabular}



## Table: Inference Time, #Dyn_Parameters, #Timestamps (binning vs free).

In [36]:
tvx_phantom = TVxEHR.load("/home/asem/GP/ehr-data/mimic4aki-cohort/tvx_aki_phantom.h5")

In [37]:
configs = {exp: Config.from_dict(load_config(os.path.join(CONFIGS_DIR, f'{exp}.json')))
           for exp in cost_results_df.experiment.unique()}
models = {exp: Experiment(config=c).load_model(tvx_phantom, 0)
          for exp, c in configs.items()}
models_dyn = {exp: m.f_dyn for exp, m in models.items()}
model_dyn_nparams = {exp: sum(a.size for a in jtu.tree_leaves(eqx.filter(m, eqx.is_inexact_array)))
                     for exp, m in models_dyn.items()}

TypeError: OptimizerConfig.__init__() got an unexpected keyword argument 'decay_rate'

In [301]:
def experiment_snapshot_total_time(dataframe):
    def total_time(snapshot_df):
        return  pd.Series({
            'total_time': snapshot_df.iloc[0].updated_at - snapshot_df.iloc[0].created_at})
    return dataframe.groupby(['experiment', 'step']).apply(total_time)


def experiment_snapshot_evaluation_time(dataframe):
    eval_time_metrics = [m for m in dataframe.metric.unique() if m.endswith('.eval_time')]
    times = dataframe[dataframe.metric.isin(eval_time_metrics)].set_index(['experiment', 'step'])[['metric', 'value']]
    times.loc[:, 'value'] = times.loc[:, 'value'].map(lambda seconds: pd.Timedelta(seconds=seconds))
    times = times.pivot(columns='metric', values='value')
    times['total_time'] = times.sum(axis=1)
    return times

In [350]:
for_time_df = make_selection(results, selectors = {
    'predictor': [
        'mlp',
        'monotonic'
    ],
    'icenode_dyn': [
        'mlp', 
        'gru', 
        'NA'
    ],
    'loss': [
        'mse', 
        'bce'
    ],
    'batch_size':
    [
        64,
        2
    ],
    'experiment': 
    [
        # "mlp_mse_inicenodelite_g0", 
        "monotonic_bce_inicenodelite_gru_g0",
        "monotonic_bce_gruodebayes_g0", 
        "mlp_mse_ingru_g0"
    ]
})

exp_map = for_time_df.set_index("experiment")["model"].to_dict()
# Inference + Evaluation Time
experiments_total_time = experiment_snapshot_total_time(for_time_df)
# Time per evaluation module
experiments_evaluation_time = experiment_snapshot_evaluation_time(for_time_df)
# Inference times
experiments_total_time['evaluation_time'] = experiments_evaluation_time.total_time
experiments_total_time['inference_time'] = experiments_total_time.total_time - experiments_evaluation_time.total_time
experiments_total_time_median = experiments_total_time.groupby(level=0)[experiments_total_time.columns].median()

experiments_total_time_median = experiments_total_time_median.apply(lambda x: x.dt.total_seconds() / 3600)
experiments_total_time_median = experiments_total_time_median[['inference_time']]
experiments_total_time_median.columns = ['Inference Time (h)']
experiments_stats = experiments_total_time_median.copy()
experiments_stats['#Dynamics Parameters'] = experiments_stats.index.map(model_dyn_nparams)

experiments_stats.index = experiments_stats.index.map(exp_map)
experiments_stats.index.name = "Model"

with pd.option_context('styler.format.escape', 'latex',
                        'styler.latex.multicol_align', 'c',
                        'styler.sparse.index', True):
    experiments_stats_ = experiments_stats.copy()
    experiments_stats_.index = list(map(lambda m: rf'\texttt{{{m}}}', experiments_stats_.index))
    print(experiments_stats_.style.format({"#Dynamics Parameters": "{:,.0f}", "Inference Time (h)": "{:.1f}"})
          .to_latex(buf=None))
    

\begin{tabular}{lrr}
 & Inference Time (h) & #Dynamics Parameters \\
\texttt{GRU} & 9.1 & 15,200 \\
\texttt{GRU-ODE-Bayes} & 32.9 & 11,700 \\
\texttt{eICE-NODE} & 26.1 & 11,700 \\
\end{tabular}



## ICE-NODE Ablation.

In [303]:
icenode_ablation_df = make_selection(results, selectors = {
    'metric': [
        'LeadPredictionLossMetric.mse',
        'ObsPredictionLossMetric.mse'
    ],
    'model': [
        'eICE-NODE'
    ],
    # 'experiment': 
    # [
    #     "monotonic_bce_inicenodelite_gru_g0", 
    #     "monotonic_bce_gruodebayes_g0", 
    #     "mlp_mse_ingru_g0"
    # ]
})

In [310]:
icenode_ablation_df['best_snapshot'] = icenode_ablation_df.experiment.map(best_experiment_snapshot)
top_icenode_ablation_df = icenode_ablation_df[icenode_ablation_df.step == icenode_ablation_df.best_snapshot]
top_icenode_ablation_df = top_icenode_ablation_df[['experiment', 
                                                   'icenode_dyn', 'predictor',
                                                   'loss', 'batch_size',
                                                   'metric', 'value']]


window_map = {
    'LeadPredictionLossMetric.mse': r"$L_q$",
    'ObsPredictionLossMetric.mse': r"$L_z$",
}
top_icenode_ablation_df.metric = top_icenode_ablation_df.metric.map(window_map)

top_icenode_ablation_df = top_icenode_ablation_df.pivot(index=['icenode_dyn', 'predictor', 'loss', 'batch_size'], 
                                                        columns="metric", values="value")
top_icenode_ablation_df[r"$L_z + L_q$"] = top_icenode_ablation_df[r"$L_q$"] + top_icenode_ablation_df[r"$L_z$"]
top_icenode_ablation_df.index.names = ["Dynamics", "Lead Predictor", "Lead Loss", "Batch Size"]
top_icenode_ablation_df.columns.name = ''

In [319]:
with pd.option_context('styler.format.escape', 'latex',
                        'styler.latex.multicol_align', 'c',
                        'styler.sparse.index', True):
    
    print(top_icenode_ablation_df.to_latex(buf=None, columns=None, header=True, 
             index=True, na_rep='NaN', escape=False, 
             multicolumn=True, multicolumn_format='|c|', 
             multirow=True))


\begin{tabular}{llllrrr}
\toprule
 &  &  &  & $L_q$ & $L_z$ & $L_z + L_q$ \\
Dynamics & Lead Predictor & Lead Loss & Batch Size &  &  &  \\
\midrule
gru & monotonic & bce & 64 & 0.239360 & 0.033283 & 0.272643 \\
\cline{1-7} \cline{2-7} \cline{3-7}
\multirow[t]{4}{*}{mlp} & \multirow[t]{2}{*}{mlp} & bce & 2 & 0.225797 & 7.864181 & 8.089978 \\
\cline{3-7}
 &  & mse & 64 & 0.222691 & 0.024944 & 0.247635 \\
\cline{2-7} \cline{3-7}
 & \multirow[t]{2}{*}{monotonic} & bce & 64 & 0.221699 & 6.738520 & 6.960219 \\
\cline{3-7}
 &  & mse & 64 & 0.224047 & 24.191252 & 24.415298 \\
\cline{1-7} \cline{2-7} \cline{3-7}
\bottomrule
\end{tabular}

