### Get explainablity scores for model predictions

In [None]:
import sys
import itertools
import os
import numpy as np
import pandas as pd
from cyclops.feature_handler import FeatureHandler

sys.path.append('../..')

from drift_detector.explainer import Explainer
from baseline_models.temporal.pytorch.utils import load_ckp, get_device, get_temporal_model

## Get data

In [None]:
DIR = "/mnt/nfs/project/delirium/drift_exp/JULY-04-2022"
split_type = "simulated_deployment"

X_train = np.load(os.path.join(DIR, split_type, "X_train.npy"))
X_val = np.load(os.path.join(DIR, split_type, "X_val.npy"))
X_test = np.load(os.path.join(DIR, split_type, "X_test.npy"))

y_train = np.load(os.path.join(DIR, split_type, "y_train.npy"))
y_val = np.load(os.path.join(DIR, split_type, "y_val.npy"))
y_test = np.load(os.path.join(DIR, split_type, "y_test.npy"))

feature_handler = FeatureHandler()
feature_handler.load(DIR, "features")
features = feature_handler.features["temporal"].columns.tolist() + feature_handler.features["static"].columns.tolist()

## Get model

In [None]:
MODEL_PATH = os.path.join(DIR, "saved_models", split_type + "_lstm.pt")

device = get_device()
input_dim = 108
hidden_dim = 64
layer_dim = 2
output_dim = 1
dropout = 0.2
last_timestep_only = False

model_params = {
    "device": device,
    "input_dim": input_dim,
    "hidden_dim": hidden_dim,
    "layer_dim": layer_dim,
    "output_dim": output_dim,
    "dropout_prob": dropout,
    "last_timestep_only": last_timestep_only,
}


model = get_temporal_model(model_name, model_params).to(device)

model, opt, n_epochs = load_ckp(MODEL_PATH, model)

## Explain difference in model predictions

In [None]:
timesteps = ['T1_', 'T2_','T3_','T4_', 'T5_','T6_']

flattened_feats = []
for ts in timesteps:
    flattened_feats.append(ts+features)
flattened_feats = list(itertools.chain.from_iterable(flattened_feats))

explainer = Explainer(model)
explainer.get_explainer()

X_val_df = pd.DataFrame(X_val_final, columns=flattened_feats)
val_shap_values = explainer.get_shap_values(X_val_df)
X_test_df = pd.DataFrame(X_t_final, columns=flattened_feats)
test_shap_values = explainer.get_shap_values(X_test_df)

diff = np.mean(np.abs(test_shap_values.values), axis=0) - np.mean(
    np.abs(val_shap_values.values), axis=0
)
diff_sorted, feats_sorted = zip(*sorted(zip(diff, flattened_feats), reverse=True))
diff_sorted, feats_sorted = zip(
    *(((x, y) for x, y in zip(diff_sorted, feats_sorted) if (x > 0.01 or x < -0.01)))
)
diff_sorted = list(diff_sorted)
feats_sorted=list(map(lambda x: x.replace('T1_', '').replace('T2_', '').replace('T3_', '').replace('T4_', '').replace('T5_', '').replace('T6_', ''),feats_sorted))
del diff_sorted[5]
del feats_sorted[5]
fig, ax = plt.subplots(figsize=(12, 12))
y_pos = np.arange(len(diff_sorted))
ax.barh(y_pos, diff_sorted, align="center")
ax.set_yticks(y_pos, labels=feats_sorted)
ax.invert_yaxis()  # labels read top-to-bottom
ax.set_xlabel("Mean Difference in Shap Value")
ax.set_title("Features")
plt.show()