# Tabular modelling

# Imports

In [1]:
from sklearn.metrics import roc_auc_score

from cyclops.processors.column_names import ENCOUNTER_ID
from cyclops.processors.constants import FEATURES, TARGETS
from cyclops.utils.file import load_pickle
from drift_detection.baseline_models.static.gbt import fit_gbt
from use_cases.common.util import get_use_case_params

# Choose dataset and use-case

In [2]:
DATASET = "gemini"
USE_CASE = "mortality_decompensation"

use_case_params = get_use_case_params(DATASET, USE_CASE)
input(f"WARNING: LOADING CONSTANTS FROM {use_case_params}")



''

# Setup

In [3]:
OUTCOME = use_case_params.OUTCOME_DEATH

## Data

In [4]:
X_train_vec = load_pickle(use_case_params.TAB_UNALIGNED + "tab_train_X")
y_train_vec = load_pickle(use_case_params.TAB_UNALIGNED + "tab_train_y")
X_val_vec = load_pickle(use_case_params.TAB_UNALIGNED + "tab_val_X")
y_val_vec = load_pickle(use_case_params.TAB_UNALIGNED + "tab_val_y")
X_test_vec = load_pickle(use_case_params.TAB_UNALIGNED + "tab_test_X")
y_test_vec = load_pickle(use_case_params.TAB_UNALIGNED + "tab_test_y")

2022-10-24 14:25:52,082 [1;37mINFO[0m cyclops.utils.file - Loading pickled data from /mnt/nfs/project/delirium/cyclops/use_cases/gemini/mortality_decompensation/./data/4-final/unaligned_tab_train_X.pkl
2022-10-24 14:25:52,236 [1;37mINFO[0m cyclops.utils.file - Loading pickled data from /mnt/nfs/project/delirium/cyclops/use_cases/gemini/mortality_decompensation/./data/4-final/unaligned_tab_train_y.pkl
2022-10-24 14:25:52,309 [1;37mINFO[0m cyclops.utils.file - Loading pickled data from /mnt/nfs/project/delirium/cyclops/use_cases/gemini/mortality_decompensation/./data/4-final/unaligned_tab_val_X.pkl
2022-10-24 14:25:52,324 [1;37mINFO[0m cyclops.utils.file - Loading pickled data from /mnt/nfs/project/delirium/cyclops/use_cases/gemini/mortality_decompensation/./data/4-final/unaligned_tab_val_y.pkl
2022-10-24 14:25:52,333 [1;37mINFO[0m cyclops.utils.file - Loading pickled data from /mnt/nfs/project/delirium/cyclops/use_cases/gemini/mortality_decompensation/./data/4-final/unaligned_

In [5]:
X_train_vec.get_index(FEATURES)

array(['admit_via_ambulance_', 'admit_via_ambulance_air',
       'admit_via_ambulance_ground', 'admit_via_ambulance_no_ambulance',
       'admit_via_ambulance_no_info', 'age',
       'diagnosis_trajectory_A00_B99', 'diagnosis_trajectory_C00_D49',
       'diagnosis_trajectory_D50_D89', 'diagnosis_trajectory_E00_E89',
       'diagnosis_trajectory_F01_F99', 'diagnosis_trajectory_G00_G99',
       'diagnosis_trajectory_H00_H59', 'diagnosis_trajectory_H60_H95',
       'diagnosis_trajectory_I00_I99', 'diagnosis_trajectory_J00_J99',
       'diagnosis_trajectory_K00_K95', 'diagnosis_trajectory_L00_L99',
       'diagnosis_trajectory_M00_M99', 'diagnosis_trajectory_N00_N99',
       'diagnosis_trajectory_O00_O99', 'diagnosis_trajectory_Q00_Q99',
       'diagnosis_trajectory_R00_R99', 'diagnosis_trajectory_S00_T88',
       'diagnosis_trajectory_V00_Y99', 'diagnosis_trajectory_Z00_Z99',
       'from_acute_care_institution_mapped', 'from_nursing_home_mapped',
       'hospital_id_MSH', 'hospital_id_SB

In [6]:
len(X_train_vec.get_index(ENCOUNTER_ID))

110394

In [10]:
y_train_vec.g

array(['outcome_death'], dtype='<U13')

# Feature/target selection

In [7]:

_, y_train_vec = y_train_vec.split_out(FEATURES, [OUTCOME])
_, y_val_vec = y_val_vec.split_out(FEATURES, [OUTCOME])
_, y_test_vec = y_test_vec.split_out(FEATURES, [OUTCOME])

IndexError: `indices` must be an integer array

In [None]:
X_train = X_train_vec.data
y_train = y_train_vec.data
X_val = X_val_vec.data
y_val = y_val_vec.data
X_test = X_test_vec.data
y_test = y_test_vec.data

# Model

In [None]:
# Choose model
X_train.shape, y_train.shape, X_val.shape, y_val.shape

# Training and Validation

In [None]:
best_model = fit_gbt(X_train, y_train, X_val, y_val)

# Testing

In [None]:
predictions = best_model.predict_proba(X_test)[:, 1]
score = roc_auc_score(y_test, predictions)
print(score)