# Code to Run Single Experiments for Traditional Classifiers

#### Do not change the cell below

In [1]:
import os
os.chdir("../../..")

## Load Libraries

In [10]:

# Import required packages
import json
import numpy as np
import wandb

from src.models.DIRVRNN.model import DirVRNN
from src.dataloading.TrainingDataLoader import TrainingDataLoader

from src.metrics.summarize import summary_all_metrics
from src.visualization.metrics_and_losses import plot_multiclass_metrics

from datetime import datetime



## Configuration 

In [6]:
start_time = datetime.now()

args = {
    "model_name": "TSKM",
    "data_dir":"data/MIMIC/processed",
    "time_window": [0, 10],
    "feat_subset":"vitals-static",
    "train_test_ratio":0.6,
    "train_val_ratio":0.6,
    "seed": 3535,
    "normalize": True,
    "num_folds": 1,
    "model_params": {
        "window_num_obvs": 4,
        "K": 6,
        "latent_dim": 32,
        "n_fwd_blocks": 1,
        "dropout": 0.5
    },
    "training_params": {
        "batch_size": 32,
        "num_epochs": 100,
        "lr": 0.001,
    }
}

## Load Data and Process

In [4]:

start_time = datetime.now()

#### LOAD DATA
data_loader = TrainingDataLoader(
    data_dir=args["data_dir"],
    time_window=args["time_window"],
    feat_subset=args["feat_subset"],
    train_test_ratio=args["train_test_ratio"],
    train_val_ratio=args["train_val_ratio"],
    seed=args["seed"],
    normalize=args["normalize"],
    num_folds=args["num_folds"]
)
data_characteristics = data_loader._get_data_characteristics()


# Unpack
input_shape = data_characteristics["num_samples"], data_characteristics["num_timestamps"], data_characteristics["num_features"]
output_dim = data_characteristics["num_outcomes"]

8328it [01:46, 78.49it/s] 


## Load Model

In [11]:

# Prepare wandb
wandb.init(project="DirVRNN", config=args)

model = DirVRNN(input_dims = input_shape[-1], 
                num_classes = output_dim,
                window_num_obvs=args["model_params"]["window_num_obvs"],
                K=args["model_params"]["K"],
                latent_dim=args["model_params"]["latent_dim"],
                n_fwd_blocks=args["model_params"]["n_fwd_blocks"],
                seed=args["seed"],
                dropout=args["model_params"]["dropout"]
            )

## ACCESS Train Data
X_train, y_train = data_loader.get_train_X_y(fold=1)
X_val, y_val = data_loader.get_test_X_y(mode="val", fold=1)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mhenrique-aguiar[0m ([33mhrna-ox[0m). Use [1m`wandb login --relogin`[0m to force relogin


## Train Model

In [12]:

model.fit(
    train_data=(X_train, y_train),
    val_data=(X_val, y_val),
    batch_size=args["training_params"]["batch_size"],
    lr=args["training_params"]["lr"],
    num_epochs=args["training_params"]["num_epochs"]
)


Printing Losses loss, Log Lik, KL, Outl


RuntimeError: mat1 and mat2 must have the same dtype

## Evaluate on Test Set and Get Metrics

In [None]:

#### EVALUATE MODEL
# Evaluate on test data
X_test, y_test = data_loader.get_test_X_y(fold=0, mode="test")
output = model.predict(X=X_test, y=y_test)
y_pred = output["outputs_future"]["y_pred"].to_numpy()
clus_pred = np.argmax(output["outputs_future"]["pis"].to_numpy()[:, -1, :], axis=-1)

# Convert to Labels
labels_test = np.argmax(y_test, axis=1)

# Compute Metrics and visualize
metrics_dict = summary_all_metrics(
    labels_true=labels_test, scores_pred=y_pred,
    X=X_test.reshape(X_test.shape[0], -1), clus_pred=clus_pred
)
ax, lachiche_ax = plot_multiclass_metrics(metrics_dict=metrics_dict, class_names=data_characteristics["outcomes"])


## Log Results and Performance

In [None]:

# Log Model, Results and Visualizations
cur_time_as_str = datetime.now().strftime("%Y%m%d-%H%M%S")
test_dir = f"results/{args['model_name']}/{cur_time_as_str}/"

run_info = {
    "data_characteristics": data_characteristics,
    "args": args,
    "metrics": metrics_dict,
}
objects_to_log = {
    "data": {
        "X": (X_train, X_test),
        "y": (y_train, y_test),
    },
    "labels_test": labels_test,
    "y_pred": y_pred
}
model.log_model(save_dir=test_dir, objects_to_log=objects_to_log, run_info=run_info)

print("Time taken: ", datetime.now() - start_time)
print("Analysis Complete.")


In [None]:
for name in ["macro_f1_score", "precision", "recall", "ovr_auroc", 
            "silhouette", "davies_bouldin", "calinski_harabasz"]:
    print(f"{name}: {np.mean(metrics_dict[name]):.3f}")

In [None]:
print(metrics_dict["confusion_matrix"])

In [None]:
run_info

In [None]:
print("Means: ", np.reshape(means, (model.K, *model.input_shape[1:])))
print("Covariances: ", np.reshape(model.K, *model.input_shape[1:]))
print("Cluster Probs: ", cluster_probs)