### Synthetic Drift Detection ###

## Imports ## 

In [None]:
import matplotlib.pyplot as plt
from drift_detector.detector import Detector
from drift_detector.experimenter import Experimenter
from drift_detector.plotter import plot_drift_samples_pval
from drift_detector.reductor import Reductor
from drift_detector.synthetic_applicator import (
    SyntheticShiftApplicator,
    apply_predefined_shift,
)
from drift_detector.tester import DCTester, TSTester
from gemini.query import get_gemini_data
from gemini.utils import get_label, import_dataset_hospital, normalize, process, scale

## Parameters ##

In [None]:
PATH = "/mnt/nfs/project/delirium/drift_exp/JULY-04-2022/"
MODEL_PATH = (
    "/mnt/nfs/project/delirium/drift_exp/JULY-04-2022/saved_models/random_lstm.pt"
)
TIMESTEPS = 6
AGGREGATION_TYPE = "time_flatten"
CONTEXT_TYPE = "lstm"
REPRESENTATION = "rf"
HOSPITALS = ["SMH", "MSH", "THPC", "THPM", "UHNTG", "UHNTW", "PMH", "SBK"]
OUTCOME = "mortality"

## Query Data

In [None]:
admin_data, x, y = get_gemini_data(PATH)

## Preprocess Data

In [None]:
# Get subset
(X_tr, y_tr), (X_val, y_val), (X_t, y_t), feats, admin_data = import_dataset_hospital(
    admin_data, x, y, "random", OUTCOME, HOSPITALS
)

# Normalize data
X_tr_normalized = normalize(admin_data, X_tr, AGGREGATION_TYPE, TIMESTEPS)
X_val_normalized = normalize(admin_data, X_val, AGGREGATION_TYPE, TIMESTEPS)
X_t_normalized = normalize(admin_data, X_t, AGGREGATION_TYPE, TIMESTEPS)

if AGGREGATION_TYPE != "time":
    # Get labels
    y_tr = get_label(admin_data, X_tr, OUTCOME)
    y_val = get_label(admin_data, X_val, OUTCOME)
    y_t = get_label(admin_data, X_t, OUTCOME)

# Scale data
X_tr_scaled = scale(X_tr_normalized)
X_val_scaled = scale(X_val_normalized)
X_t_scaled = scale(X_t_normalized)

# Process data
X_tr_final = process(X_tr_scaled, AGGREGATION_TYPE, TIMESTEPS)
X_val_final = process(X_val_scaled, AGGREGATION_TYPE, TIMESTEPS)
X_t_final = process(X_t_scaled, AGGREGATION_TYPE, TIMESTEPS)

## Reductor

In [None]:
DR_TECHNIQUE = input("Select dimensionality reduction technique: ")

reductor = Reductor(
    dr_method=DR_TECHNIQUE,
    model_path=MODEL_PATH,
    var_ret=0.8,
)

## Tester

In [None]:
TESTER_METHOD = input("Select test method: ")
tstesters = ["lk", "lsdd", "mmd", "tabular", "ctx_mmd", "chi2", "fet", "ks"]
dctesters = ["spot_the_diff", "classifier", "classifier_uncertainty"]

if TESTER_METHOD in tstesters:
    tester = TSTester(
        tester_method=TESTER_METHOD,
    )
elif TESTER_METHOD in dctesters:
    MODEL_METHOD = input("Select model method: ")
    tester = DCTester(
        tester_method=TESTER_METHOD,
        model_method=MODEL_METHOD,
    )

    if MODEL_METHOD == "ctx_mmd":
        CONTEXT_TYPE = input("Select context type: ")

    if MODEL_METHOD == "lk":
        REPRESENTATION = input("Select learned kernel representation: ")

## Detector 

In [None]:
detector = Detector(
    reductor=reductor,
    tester=tester,
    p_val_threshold=0.05,
)
detector.fit(X_tr_final)

## SyntheticShiftApplicator

In [None]:
shiftapplicator = SyntheticShiftApplicator(
    shift_type="gn_shift",
)

## Experimenter

In [None]:
experimenter_custom = Experimenter(
    detector=detector, shiftapplicator=shiftapplicator, admin_data=admin_data
)

## Run custom shift experiment

In [None]:
X_t_final_shifted = experimenter_custom.apply_synthetic_shift(
    X_t_final, shift_type="gn_shift", delta=0.01, noise_amt=0.01, clip=False
)

results = experimenter_custom.detect_shift_samples(X_t_final_shifted)

results

## Run predefined shift experiments ##

In [None]:
SHIFT = input("Select shift experiment: ")

if SHIFT == "ko_shift":
    shifts = ["ko_shift_0.1", "ko_shift_0.5", "ko_shift_1.0"]
elif SHIFT == "small_gn_shift":
    shifts = ["small_gn_shift_0.1", "small_gn_shift_0.5", "small_gn_shift_1.0"]
elif SHIFT == "medium_gn_shift":
    shifts = ["medium_gn_shift_0.1", "medium_gn_shift_0.5", "medium_gn_shift_1.0"]
elif SHIFT == "large_gn_shift":
    shifts = ["large_gn_shift_0.1", "large_gn_shift_0.5", "large_gn_shift_1.0"]
elif SHIFT == "mfa_shift":
    shifts = ["mfa_shift_0.25", "mfa_shift_0.5", "mfa_shift_0.75"]
elif SHIFT == "cp_shift":
    shifts = ["cp_shift_0.25", "cp_shift_0.75"]
elif SHIFT == "small_bn_shift":
    shifts = ["small_bn_shift_0.1", "small_bn_shift_0.5", "small_bn_shift_1.0"]
elif SHIFT == "medium_bn_shift":
    shifts = ["medium_bn_shift_0.1", "medium_bn_shift_0.5", "medium_bn_shift_1.0"]
elif SHIFT == "large_bn_shift":
    shifts = ["large_bn_shift_0.1", "large_bn_shift_0.5", "large_bn_shift_1.0"]

In [None]:
experimenter_predefined = Experimenter(detector=detector, admin_data=admin_data)

In [None]:
shift_results = {}
for si, shift in enumerate(shifts):
    X_t_final_shifted = X_t_final.copy()
    X_t_final_shifted, _ = apply_predefined_shift(shift, X=X_t_final_shifted, y=y_t)
    results = experimenter_predefined.detect_shift_samples(X_t_final_shifted)
    shift_results.update({shift: results})

In [None]:
X_t_final_shifted = X_t_final.copy()
X_t_final_shifted, _ = apply_predefined_shift(shift, X=X_t_final_shifted, y=y_t)

In [None]:
fig, ax = plt.subplots(figsize=(11, 6))
plt.hist(X_val_final[:, 0], bins=50, alpha=0.5, label="val", density=True)
plt.hist(X_t_final[:, 0], bins=50, alpha=0.5, label="test", density=True)
plt.hist(X_t_final_shifted[:, 0], bins=50, alpha=0.5, label="test+noise", density=True)
fig.legend(loc="upper right")
plt.show()

In [None]:
experimenter2 = Experimenter(detector=detector, admin_data=admin_data)

In [None]:
experimenter2.detect_shift_sample(X_t_final_shifted, sample=100)

## Plot shift experiments

In [None]:
plot_drift_samples_pval(shift_results, 0.05)