# Tabular modelling

# Imports

In [None]:
import importlib

import xgboost as xgb
from sklearn.metrics import accuracy_score

from cyclops.processors.column_names import ENCOUNTER_ID
from cyclops.processors.constants import FEATURES
from cyclops.utils.file import load_pickle

from use_cases.common.util import get_use_case_params

# Choose dataset and usecase

In [None]:
DATASET = "gemini"
USE_CASE = "mortality_decompensation"

use_case_params = get_use_case_params(DATASET, USE_CASE)
input(f"WARNING: LOADING CONSTANTS FROM {use_case_params}")

# Setup

In [None]:
# Modelling constants

## Data

In [None]:
X_train_vec = load_pickle(usecase_params.TAB_VEC_COMB + "unaligned_tab_train_X")
y_train_vec = load_pickle(usecase_params.TAB_VEC_COMB + "unaligned_tab_train_y")
X_val_vec = load_pickle(usecase_params.TAB_VEC_COMB + "unaligned_tab_val_X")
y_val_vec = load_pickle(usecase_params.TAB_VEC_COMB + "unaligned_tab_val_y")
X_test_vec = load_pickle(usecase_params.TAB_VEC_COMB + "unaligned_tab_test_X")
y_test_vec = load_pickle(usecase_params.TAB_VEC_COMB + "unaligned_tab_test_y")

In [None]:
X_train_vec.get_index(FEATURES)

In [None]:
len(X_train_vec.get_index(ENCOUNTER_ID))

In [None]:
# Feature selection - e.g.,
# kept_vec, unwanted_vec = X_train_vec.split_out(FEATURES, "featureA")

In [None]:
X_train = X_train_vec.data
y_train = y_train_vec.data
X_val = X_val_vec.data
y_val = y_val_vec.data
X_test = X_test_vec.data
y_test = y_test_vec.data

## Model

In [None]:
# Init classifier
xgb_clf = xgb.XGBClassifier(X_train, y_train)

# Training

In [None]:
# Fit
xgb_clf.fit(X_train, y_train)

# Validation

In [None]:
# Predict
preds = xgb_clf.predict(X_test)

# Score
accuracy_score(y_test, preds)