# Code to Run Single Experiments for Traditional Classifiers

#### Do not change the cell below

In [1]:
import os
os.chdir("../../..")

## Load Libraries

In [2]:

# Import required packages
from datetime import datetime
import json
import numpy as np

from src.models.TraditionalClassifiers.model import BaseClassifier, ParallelClassifier
from src.dataloading.TrainingDataLoader import TrainingDataLoader

from src.metrics.summarize import summary_all_metrics
from src.visualization.metrics_and_losses import plot_multiclass_metrics

## Configuration 

In [3]:
start_time = datetime.now()

args = {
    "model_name": "svm",
    "feature_parallelize": False,
    "data_dir":"data/MIMIC/processed",
    "time_window": [0, 10],
    "feat_subset":"vitals-static",
    "train_test_ratio":0.6,
    "train_val_ratio":0.6,
    "seed": 1313,
    "normalize": True,
    "num_folds": 1,
    "model_params": {
        "kernel": "linear",
        "C": 20,
        "random_state": 1313
    }
}

## Load Data and Process

In [4]:

#### LOAD DATA
data_loader = TrainingDataLoader(
    data_dir=args["data_dir"],
    time_window=args["time_window"],
    feat_subset=args["feat_subset"],
    train_test_ratio=args["train_test_ratio"],
    train_val_ratio=args["train_val_ratio"],
    seed=args["seed"],
    normalize=args["normalize"],
    num_folds=args["num_folds"]
)
data_characteristics = data_loader._get_data_characteristics()

# Unpack
input_shape = data_characteristics["num_samples"], data_characteristics["num_timestamps"], data_characteristics["num_features"]
output_dim = data_characteristics["num_outcomes"]

8328it [00:49, 169.44it/s]


## Load Model

In [5]:

#### TRAIN MODEL
# Initialize model
if not args["feature_parallelize"]:
    model = BaseClassifier(input_shape=input_shape, output_dim=output_dim, model_name=args["model_name"], **args["model_params"])
else:
    model = ParallelClassifier(input_shape=input_shape, output_dim=output_dim, model_name=args["model_name"], **args["model_params"])


## Train Model

In [6]:
X_train, y_train = data_loader.get_train_X_y(fold=0)
model.train(train_data=(X_train, np.argmax(y_train, axis=-1)))



Training svm model...


[LibSVM]

## Evaluate on Test Set and Get Metrics

In [None]:
X_test, y_test = data_loader.get_test_X_y(fold=0, mode="test")
y_pred = model.predict(X_test)

In [None]:
# Convert to Labels
labels_test = np.argmax(y_test, axis=1)

metrics_dict = summary_all_metrics(labels_true=labels_test, scores_pred=y_pred)
# ax, lachiche_ax = plot_multiclass_metrics(metrics_dict=metrics_dict, class_names=data_characteristics["outcomes"])


## Log Results and Performance

In [None]:

# Log Model, Results and Visualizations
cur_time_as_str = datetime.now().strftime("%Y%m%d-%H%M%S")

if args["feature_parallelize"]:
    test_dir = f"results/{args['model_name']}_parallel/{cur_time_as_str}/"
else:
    test_dir = f"results/{args['model_name']}_base/{cur_time_as_str}/"

run_info = {
    "data_characteristics": data_characteristics,
    "args": args,
    "metrics": metrics_dict,
}
objects_to_log = {
    "data": {
        "X": (X_train, X_test),
        "y": (y_train, y_test),
    },
    "labels_test": labels_test,
    "y_pred": y_pred
}

model.log_model(save_dir=test_dir, objects_to_log=objects_to_log, run_info=run_info)
print("Time taken: ", datetime.now() - start_time)

In [None]:
for name in ["macro_f1_score", "precision", "recall", "ovr_auroc"]:
    print(f"{name}: {np.mean(metrics_dict[name]):.3f}")

In [None]:
print(metrics_dict["confusion_matrix"])