In [74]:
import pandas as pd
import sqlalchemy

RESULTS_DIR = "aki_evals"

db_name = [f"{RESULTS_DIR}/seg_evals.sqlite",
           f"{RESULTS_DIR}/bin_evals.sqlite"]




In [84]:

def model_name(exp):
    if 'inicenodelite_' in exp:
        return 'eICE-NODE'
    if 'gruodebayes' in exp:
        return 'GRU-ODE-Bayes'
    if 'ingru' in exp:
        return 'GRU'
    if 'inkoopman' in exp:
        return 'Koopman'
    if 'inicenodeliteicnn' in exp:
        return 'ODE-ICNN'
    assert False
    
def loss_name(exp):
    if 'mse' in exp:
        return 'mse'
    if 'bce' in exp:
        return 'bce'
    assert False

def predictor_name(exp):    
    if exp.startswith('mlp'):
        return 'mlp'
    if exp.startswith('monotonic'):
        return f'monotonic'
    assert False

def batch_size(exp):
    if 'B2' in exp:
        return 2
    else:
        return 64
    assert False

def icenode_dyn(exp):
    if 'inicenodelite_gru_' in exp:
        return 'gru'
    if 'inicenodelite' in exp:
        return 'mlp'
    else:
        return 'NA'
        


def sql2dataframe(db):
    engine = sqlalchemy.create_engine("sqlite:///%s" % db, execution_options={"sqlite_raw_colnames": True},
                                     connect_args={'timeout': 5})
    
    df = {name: pd.read_sql_table(name, engine) for name in 
          ('evaluation_runs', 'evaluation_status', 'experiments', 'metrics', 'results')}
    df['results']
    
    metrics = df['metrics'].rename(columns={'name': 'metric', 'id': 'metric_id'})
    eval_runs = df['evaluation_runs'].rename(columns={'id': 'evaluation_id'})
    experiments = df['experiments'].rename(columns={'name': 'experiment', 'id': 'experiment_id'})
    eval_status = df['evaluation_status'].rename(columns={'id': 'status_id', 'name': 'status'})
    
    res = pd.merge(df['results'], metrics, left_on='metric_id', right_on='metric_id', how='left')
    res = pd.merge(res, eval_runs, left_on='evaluation_id', right_on='evaluation_id', how='left')
    res = pd.merge(res, experiments, left_on='experiment_id', right_on='experiment_id', how='left')
    res = pd.merge(res, eval_status, left_on='status_id', right_on='status_id', how='left')
    res['step'] = res.snapshot.str.extract('(\d+)').astype(int)

    res = res.sort_values(['experiment_id', 'step'])
    res['last_max'] = float('nan')
    res['last_min'] = float('nan')
    res['is_max'] = False
    res['is_min'] = False
    res['max'] = float('nan')
    res['min'] = float('nan')
    
    for exp, exp_df in res.groupby('experiment_id'):
        for metric, metric_df in exp_df.groupby('metric'):
            index = metric_df.index
            res.loc[index, 'last_max'] = metric_df['value'].cummax()
            res.loc[index, 'last_min'] = metric_df['value'].cummin()
            res.loc[index, 'is_max'] = metric_df['value'] == res.loc[index, 'last_max']
            res.loc[index, 'is_min'] = metric_df['value'] == res.loc[index, 'last_min']
            res.loc[index, 'max'] = metric_df['value'].max()
            res.loc[index, 'min'] = metric_df['value'].min()
            
    
    res = res[[col for col in res.columns if 'id' not in col]]

    res['model'] = res.experiment.map(model_name)
    res['loss'] = res.experiment.map(loss_name)
    res['predictor'] = res.experiment.map(predictor_name)
    res['batch_size'] = res.experiment.map(batch_size)
    res['icenode_dyn'] = res.experiment.map(icenode_dyn)
    
    return res



In [100]:
results = pd.concat([sql2dataframe(db) for db in db_name])

In [101]:
experiments = pd.DataFrame(results.experiment.unique(), columns=['label'])
experiments['model'] = experiments.label.map(model_name)
experiments['loss'] = experiments.label.map(loss_name)
experiments['predictor'] = experiments.label.map(predictor_name)
experiments['batch_size'] = experiments.label.map(batch_size)
experiments['icenode_dyn'] =  experiments.label.map(icenode_dyn)
experiments = experiments.set_index(['model', 'icenode_dyn', 'loss', 'predictor', 'batch_size']).sort_index()
experiments.index.is_unique

In [102]:
# results.metric.unique()


In [103]:
results.set_index(['model', 'icenode_dyn', 'loss', 'predictor', 'batch_size']).sort_index().index.is_unique

In [104]:
results.batch_size.unique()

In [107]:
# METRIC = 'LossMetric.lead_mse'
METRIC = [
    'LeadingAKIPredictionAccuracy.AUC_first_pre_emergence_6.0-48.0',
    'LeadingAKIPredictionAccuracy.AUC_pre_emergence_6.0-48.0',
    'LeadingAKIPredictionAccuracy.AUC_first_pre_emergence_36.0-48.0',
    'LeadingAKIPredictionAccuracy.AUC_pre_emergence_36.0-48.0'
]

MODEL = [
    'eICE-NODE', 
    'GRU-ODE-Bayes', 
    'GRU'
]
PREDICTOR = [
    'mlp', 
    'monotonic'
]
ICENODE_DYN = [
    'mlp', 
    'gru', 
    'NA'
]
LOSS = [
    'mse', 
    'bce'
]
BATCH_SIZE = [
    64,
    2
]
res_metric = results
res_metric = res_metric[res_metric['model'].isin(MODEL)]
res_metric = res_metric[res_metric['predictor'].isin(PREDICTOR)]
res_metric = res_metric[res_metric['loss'].isin(LOSS)]
res_metric = res_metric[res_metric['metric'].isin(METRIC)]


In [108]:
res_metric.groupby(['experiment', 'metric'])[['value']].max()

In [None]:
import numpy as np

from bokeh.plotting import figure, show, curdoc
from bokeh.io import output_notebook, export_svgs
output_notebook()

In [14]:
from bokeh.palettes import  mpl, small_palettes, viridis,inferno, cividis, YlOrRd4, Spectral

In [15]:
res_metric.experiment.unique()

In [16]:
p = figure(y_axis_label=METRIC, x_axis_label="Training Step")

colors = palette = Spectral[res_metric.experiment.nunique() + 3]
res_metric = res_metric.sort_values('step')
for i, (exp, df) in enumerate(res_metric.groupby('experiment')):
    color = colors[i + 3]
    model_label = df['model'].iloc[0]
    loss_label = df['loss'].iloc[0]
    modularity = df['state_modularity'].iloc[0]
    predictor_label = df['predictor'].iloc[0]
    
    label = f'{" ".join((modularity, model_label))} ({loss_label}) ({predictor_label})'
    
    p.line(x='step', y='last_max', color=color,
           line_width=4, legend_label=label, source=df)
    p.scatter(x='step', y='value', color=color,
           line_width=2, legend_label=label, source=df[df['is_max']])
    
p.legend.location = "bottom_right"
p.yaxis.axis_label = 'Prediction AUC 48-hours in-advance'
p.legend.label_text_font_size = '16pt'

curdoc().theme = 'caliber'
p.xaxis.axis_label_text_font_size = "20pt"
p.yaxis.axis_label_text_font_size = "20pt"
p.xaxis.major_label_text_font_size = '20px'
p.yaxis.major_label_text_font_size = '20px'

show(p)

In [17]:
p.output_backend = "svg"
export_svgs(p, filename="aki_prediction.svg")