# Tabular modelling

# Imports

In [None]:
from drift_detection.baseline_models.static.gbt import fit_gbt
from sklearn.metrics import roc_auc_score

from cyclops.process.constants import FEATURES
from cyclops.utils.file import load_pickle
from use_cases.common.util import get_use_case_params

# Choose dataset and use-case

In [None]:
DATASET = "mimiciv"
USE_CASE = "mortality_decompensation"

use_case_params = get_use_case_params(DATASET, USE_CASE)
input(f"WARNING: LOADING CONSTANTS FROM {use_case_params}")

# Setup

In [None]:
OUTCOME = use_case_params.OUTCOME_DEATH

## Data

In [None]:
VEC_DIR = "TAB_UNALIGNED"
USE_CASE_PARAM_VEC_DIR = getattr(use_case_params, VEC_DIR)

X_train_vec = load_pickle(USE_CASE_PARAM_VEC_DIR + "tab_train_X")
y_train_vec = load_pickle(USE_CASE_PARAM_VEC_DIR + "tab_train_y")
X_val_vec = load_pickle(USE_CASE_PARAM_VEC_DIR + "tab_val_X")
y_val_vec = load_pickle(USE_CASE_PARAM_VEC_DIR + "tab_val_y")
X_test_vec = load_pickle(USE_CASE_PARAM_VEC_DIR + "tab_test_X")
y_test_vec = load_pickle(USE_CASE_PARAM_VEC_DIR + "tab_test_y")

In [None]:
X_train_vec.get_index(FEATURES)

# Feature/target selection

In [None]:
# If multiple outcomes present in features.
if len(y_train_vec.get_index(FEATURES)) > 1:
    _, y_train_vec = y_train_vec.split_out(FEATURES, [OUTCOME])
    _, y_val_vec = y_val_vec.split_out(FEATURES, [OUTCOME])
    _, y_test_vec = y_test_vec.split_out(FEATURES, [OUTCOME])

In [None]:
X_train = X_train_vec.data
y_train = y_train_vec.data
X_val = X_val_vec.data
y_val = y_val_vec.data
X_test = X_test_vec.data
y_test = y_test_vec.data

# Model

In [None]:
# Choose model
X_train.shape, y_train.shape, X_val.shape, y_val.shape

# Training and Validation

In [None]:
best_model = fit_gbt(X_train, y_train, X_val, y_val)

# Testing

In [None]:
predictions = best_model.predict_proba(X_test)[:, 1]
score = roc_auc_score(y_test, predictions)
print(score)