# Tabular modelling

# Imports

In [None]:
from sklearn.metrics import roc_auc_score

from cyclops.processors.column_names import ENCOUNTER_ID
from cyclops.processors.constants import FEATURES
from cyclops.utils.file import load_pickle
from drift_detection.baseline_models.static.gbt import fit_gbt
from use_cases.common.util import get_use_case_params

# Choose dataset and use-case

In [None]:
DATASET = "gemini"
USE_CASE = "mortality_decompensation"

use_case_params = get_use_case_params(DATASET, USE_CASE)
input(f"WARNING: LOADING CONSTANTS FROM {use_case_params}")

# Setup

In [None]:
# Modelling constants

## Data

In [None]:
X_train_vec = load_pickle(use_case_params.TAB_VEC_COMB + "tab_train_X")
y_train_vec = load_pickle(use_case_params.TAB_VEC_COMB + "tab_train_y")
X_val_vec = load_pickle(use_case_params.TAB_VEC_COMB + "tab_val_X")
y_val_vec = load_pickle(use_case_params.TAB_VEC_COMB + "tab_val_y")
X_test_vec = load_pickle(use_case_params.TAB_VEC_COMB + "tab_test_X")
y_test_vec = load_pickle(use_case_params.TAB_VEC_COMB + "tab_test_y")

In [None]:
X_train_vec.get_index(FEATURES)

In [None]:
len(X_train_vec.get_index(ENCOUNTER_ID))

In [None]:
# Feature selection - e.g.,
# kept_vec, unwanted_vec = X_train_vec.split_out(FEATURES, "featureA")

In [None]:
X_train = X_train_vec.data
y_train = y_train_vec.data
X_val = X_val_vec.data
y_val = y_val_vec.data
X_test = X_test_vec.data
y_test = y_test_vec.data

# Model

In [None]:
# Choose model
X_train.shape, y_train.shape, X_val.shape, y_val.shape

# Training and Validation

In [None]:
best_model = fit_gbt(X_train, y_train, X_val, y_val)

# Testing

In [None]:
predictions = best_model.predict_proba(X_test)[:, 1]
score = roc_auc_score(y_test, predictions)
print(score)