# Code to Run Single Experiments for Traditional Classifiers

#### Do not change the cell below

In [1]:
import os
os.chdir("../../..")

## Load Libraries

In [2]:

# Import required packages
import json
import numpy as np

from src.models.TSKM.model import TSKM
from src.dataloading.TrainingDataLoader import TrainingDataLoader

from src.metrics.summarize import summary_all_metrics
from src.visualization.metrics_and_losses import plot_multiclass_metrics

from datetime import datetime



## Configuration 

In [3]:
start_time = datetime.now()

args = {
    "model_name": "TSKM",
    "data_dir":"data/MIMIC/processed",
    "time_window": [0, 10],
    "feat_subset":"vitals-static",
    "train_test_ratio":0.6,
    "train_val_ratio":0.6,
    "seed": 3535,
    "normalize": True,
    "num_folds": 1,
    "model_params": {
        "K": 5,
        "metric": "euclidean",
        "random_state": 3535
    }
}

## Load Data and Process

In [4]:

start_time = datetime.now()

#### LOAD DATA
data_loader = TrainingDataLoader(
    data_dir=args["data_dir"],
    time_window=args["time_window"],
    feat_subset=args["feat_subset"],
    train_test_ratio=args["train_test_ratio"],
    train_val_ratio=args["train_val_ratio"],
    seed=args["seed"],
    normalize=args["normalize"],
    num_folds=args["num_folds"]
)
data_characteristics = data_loader._get_data_characteristics()


# Unpack
input_shape = data_characteristics["num_samples"], data_characteristics["num_timestamps"], data_characteristics["num_features"]
output_dim = data_characteristics["num_outcomes"]

8328it [01:51, 74.46it/s] 


## Load Model

In [5]:

#### TRAIN MODEL
# Initialize model
model = TSKM(input_shape=input_shape, 
            output_dim=output_dim, 
            model_name=args["model_name"], 
            K=args["model_params"]["K"], 
            random_state=args["model_params"]["random_state"],
            metric=args["model_params"]["metric"],
            verbose=1,
            max_iter=25
        )


## Train Model

In [6]:

# Get whole training data and validation data
X_train, y_train = data_loader.get_train_X_y(fold=0)
model.train(train_data=(X_train, y_train))



Training TSKM model...


13976.109 --> 11185.419 --> 10834.084 --> 10646.303 --> 10502.735 --> 10401.684 --> 10343.196 --> 10305.468 --> 10285.835 --> 10272.363 --> 10260.735 --> 10253.490 --> 10248.633 --> 10243.191 --> 10238.691 --> 10236.163 --> 10232.854 --> 10227.879 --> 10224.796 --> 10222.978 --> 10221.515 --> 10220.086 --> 10218.966 --> 10218.587 --> 10218.434 --> 


## Evaluate on Test Set and Get Metrics

In [7]:

#### EVALUATE MODEL
# Evaluate on test data
X_test, y_test = data_loader.get_test_X_y(fold=0, mode="test")
y_pred, clus_pred = model.predict(X_test)

# Convert to Labels
labels_test = np.argmax(y_test, axis=1)

# Compute Metrics and visualize
metrics_dict = summary_all_metrics(
    labels_true=labels_test, scores_pred=y_pred,
    X=X_test.reshape(X_test.shape[0], -1), clus_pred=clus_pred)
# ax, lachiche_ax = plot_multiclass_metrics(metrics_dict=metrics_dict, class_names=data_characteristics["outcomes"])


## Log Results and Performance

In [8]:

# Log Model, Results and Visualizations
cur_time_as_str = datetime.now().strftime("%Y%m%d-%H%M%S")
test_dir = f"results/{args['model_name']}/{cur_time_as_str}/"

means, cluster_probs = model.get_model_objects()
run_info = {
    "data_characteristics": data_characteristics,
    "args": args,
    "metrics": metrics_dict
}
output_dir = {
    "data": {
        "X": (X_train, X_test),
        "y": (y_train, y_test)
    },
    "model": {
        "means": means,
        "cluster_probs": cluster_probs
    },
    "labels_test": labels_test,
    "y_pred": y_pred,
    "clus_pred": clus_pred,
}

model.log_model(save_dir=test_dir, objects_to_log=output_dir, run_info=run_info)
print("Time taken: ", datetime.now() - start_time)

Time taken:  0:02:39.051246


In [9]:
for name in ["macro_f1_score", "precision", "recall", "ovr_auroc", 
            "silhouette", "davies_bouldin", "calinski_harabasz"]:
    print(f"{name}: {np.mean(metrics_dict[name]):.3f}")

macro_f1_score: 0.281
precision: 0.270
recall: 0.307
ovr_auroc: 0.641
silhouette: 0.185
davies_bouldin: 1.534
calinski_harabasz: 685.347


In [10]:
print(metrics_dict["confusion_matrix"])

[[   0    0    0    0]
 [   0  496   90  340]
 [   0    0    0    0]
 [  10  648  439 1309]]


In [11]:
run_info

{'data_characteristics': {'num_samples': 8328,
  'num_timestamps': 11,
  'num_features': 9,
  'num_outcomes': 4,
  'features': ['TEMP',
   'HR',
   'RR',
   'SPO2',
   'SBP',
   'DBP',
   'age',
   'gender',
   'ESI'],
  'outcomes': ['Death', 'Discharge', 'ICU', 'Ward']},
 'args': {'model_name': 'TSKM',
  'data_dir': 'data/MIMIC/processed',
  'time_window': [0, 10],
  'feat_subset': 'vitals-static',
  'train_test_ratio': 0.6,
  'train_val_ratio': 0.6,
  'seed': 3535,
  'normalize': True,
  'num_folds': 1,
  'model_params': {'K': 5, 'metric': 'euclidean', 'random_state': 3535}},
 'metrics': {'accuracy': array([0.9969988, 0.6764706, 0.8412365, 0.5687275], dtype=float32),
  'macro_f1_score': array([0.        , 0.47922704, 0.        , 0.6456227 ], dtype=float32),
  'micro_f1_score': 0.5417167,
  'precision': array([0.        , 0.53563714, 0.        , 0.54405653], dtype=float32),
  'recall': array([0.        , 0.43356642, 0.        , 0.7938144 ], dtype=float32),
  'ovr_auroc': array([0.7389

In [12]:
print("Means: ", np.reshape(means, (model.K, *model.input_shape[1:])))
print("Covariances: ", np.reshape(model.K, *model.input_shape[1:]))
print("Cluster Probs: ", cluster_probs)

Means:  [[[9.84951590e+01 1.06838460e+02 1.90844766e+01 9.69815291e+01
   1.27083494e+02 7.44007681e+01 6.32240086e+01 5.31618435e-01
   2.31832797e+00]
  [9.84954805e+01 1.06907592e+02 1.90716149e+01 9.69884959e+01
   1.26996141e+02 7.43498571e+01 6.32240086e+01 5.31618435e-01
   2.31832797e+00]
  [9.85043766e+01 1.06738960e+02 1.90819757e+01 9.69818864e+01
   1.27020257e+02 7.43816542e+01 6.32240086e+01 5.31618435e-01
   2.31832797e+00]
  [9.85109682e+01 1.06686442e+02 1.91557521e+01 9.69795641e+01
   1.26889854e+02 7.42099857e+01 6.32240086e+01 5.31618435e-01
   2.31832797e+00]
  [9.85074312e+01 1.06244677e+02 1.91562880e+01 9.68600572e+01
   1.26685852e+02 7.40679707e+01 6.32240086e+01 5.31618435e-01
   2.31832797e+00]
  [9.85437656e+01 1.06006735e+02 1.91611111e+01 9.68346910e+01
   1.25796606e+02 7.36655055e+01 6.32240086e+01 5.31618435e-01
   2.31832797e+00]
  [9.85906752e+01 1.05405002e+02 1.92181851e+01 9.70014469e+01
   1.25360468e+02 7.31386209e+01 6.32240086e+01 5.31618435e

TypeError: order must be str, not int