# Code to Run Single Experiments for Traditional Classifiers

#### Do not change the cell below

In [1]:
import os
os.chdir("../../..")

## Load Libraries

In [14]:

# Import required packages
import json
import numpy as np

from src.models.ESI.model import ESI
from src.dataloading.TrainingDataLoader import TrainingDataLoader

from src.metrics.summarize import summary_binary_metrics, print_avg_metrics_paper
import src.logging.logger_utils as logger_utils

from datetime import datetime

## Configuration 

In [3]:

start_time = datetime.now()


#### LOAD CONFIGURATIONS
with open("src/models/ESI/run_config.json", "r") as f:
    args = json.load(f)


## Load Data and Process

In [4]:

#### LOAD DATA
data_loader = TrainingDataLoader(
    data_dir=args["data_dir"],
    time_window=args["time_window"],
    feat_subset=args["feat_subset"],
    train_test_ratio=args["train_test_ratio"],
    train_val_ratio=args["train_val_ratio"],
    seed=args["seed"],
    normalize=args["normalize"],
    num_folds=args["num_folds"]
)
data_characteristics = data_loader._get_data_characteristics()

# Unpack
input_shape = data_characteristics["num_samples"], data_characteristics["num_timestamps"], data_characteristics["num_features"]
output_dim = data_characteristics["num_outcomes"]
ESI_idx = data_characteristics["features"].index("ESI")

8328it [00:40, 206.53it/s]


## Load Model

In [5]:

#### TRAIN MODEL
model = ESI(input_shape=input_shape, output_dim=output_dim, ESI_idx=ESI_idx)


## Train Model

In [6]:

# This does nothing for ESI
X_train, y_train = data_loader.get_train_X_y(fold=0)
model.train(train_data=(X_train, y_train))



Training ESI model...




## Evaluate on Test Set and Get Metrics

In [24]:

#### EVALUATE MODEL

# Performance on Test data
X_test, y_test = data_loader.get_test_X_y(fold=0, mode="test")
y_pred = model.predict(X_test, y=y_test)
y_pred = y_pred / np.max(y_pred)

In [25]:

# Prepare labels and output dic
output_dic = {}
labels_test = np.argmax(y_test, axis=1)


# Compute Metrics and visualize
for out_idx in range(output_dim):

    # Iterate over each outcome
    has_outcome = labels_test == out_idx
    pred_score = y_pred

    # Compute scores
    output_dic[out_idx] = summary_binary_metrics(labels_true=has_outcome, scores_pred=pred_score)

# Aggreggate all metrics over the keys in output_dic
metric_keys = output_dic[0].keys()
metrics_dict = {
    key: [] for key in metric_keys
}
for out_idx in range(output_dim):
    for key in metric_keys:
        metrics_dict[key].append(output_dic[out_idx][key])

# Convert for compatibility
metrics_dict["macro_f1_score"] = metrics_dict["f1_score"]
metrics_dict["ovr_auroc"] = metrics_dict["auroc"]



## Log Results and Performance

In [26]:

# Log Model, Results and Visualizations
cur_time_as_str = datetime.now().strftime("%Y%m%d-%H%M%S")
test_dir = f"results/ESI/{cur_time_as_str}/"

# Save outputs into data objects and run information
data_objects = {
    "y_pred": y_pred,
    "labels_test": labels_test,
    "X": (X_train, X_test),
    "y": (y_train, y_test),
}

run_info = {
    "data_characteristics": data_characteristics,
    "args": args,
    "metrics": metrics_dict,
}

model.log_model(save_dir=test_dir, objects_to_log=data_objects, run_info=run_info)
print("Time taken: ", datetime.now() - start_time)


Time taken:  0:13:51.382894


In [28]:
metrics_dict

{'accuracy': [0.003001200480192077,
  0.3433373349339736,
  0.15876350540216086,
  0.49489795918367346],
 'f1_score': [0.005984440395116325,
  0.5111706843068743,
  0.2740222716560729,
  0.6621160365238007],
 'precision': [0.00300120048018307,
  0.34333733493294316,
  0.15876350540168438,
  0.4948979591821882],
 'recall': [0.9999999989999999,
  0.9999999999912589,
  0.9999999999810963,
  0.9999999999939357],
 'auroc': [0.39695966, 0.56600946, 0.3568213, 0.51819646],
 'auprc': [0.0026974764, 0.37969515, 0.14325953, 0.50437856],
 'confusion_matrix': [array([[  10, 3322],
         [   0,    0]]),
  array([[1144, 2188],
         [   0,    0]]),
  array([[ 529, 2803],
         [   0,    0]]),
  array([[1649, 1683],
         [   0,    0]])],
 'true_false_pos_neg': [(10, 0, 3322, 0),
  (1144, 0, 2188, 0),
  (529, 0, 2803, 0),
  (1649, 0, 1683, 0)],
 'lachiche': [{}, {}, {}, {}],
 'lachiche_threshold': [None, None, None, None],
 'macro_f1_score': [0.005984440395116325,
  0.5111706843068743,
  

## Log to CSV Tracker

In [27]:

# ===================== CSV LOGGING
csv_path = "results/ESI/tracker.csv"

params_header = [key for key in args.keys() if key not in ["model_params"]]
metrics_header = ["F1", "Precision", "Recall", "Auroc", "SIL", "DBI", "VRI"]
logger_utils.make_csv_if_not_exists(csv_path, params_header + metrics_header)

# Append Row
metrics_to_print = print_avg_metrics_paper(metrics_dict)
row_append = *[args[key] for key in params_header], *metrics_to_print
logger_utils.write_csv_row(csv_path, row_append)

macro_f1_score: 0.363
precision: 0.250
recall: 1.000
ovr_auroc: 0.459
silhouette: N/A
davies_bouldin: N/A
calinski_harabasz: N/A


## Other Analysis

In [11]:
for name in ["macro_f1_score", "precision", "recall", "ovr_auroc"]:
    print(f"{name}: {np.mean(metrics_dict[name]):.3f}")

macro_f1_score: 0.001
precision: 0.001
recall: 0.250
ovr_auroc: 0.459


In [12]:
print(metrics_dict["confusion_matrix"])

[[  10 1144  529 1649]
 [   0    0    0    0]
 [   0    0    0    0]
 [   0    0    0    0]]
