Conceding in next 10 actions model - Model Tuning and Building - GBM

In [None]:
import sys
sys.path.append("/Users/ciaran/Documents/Projects/AFL/git-repositories/afl-player-ratings/")

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import joblib
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

from vaep.vaep_config import vaep_modelling_v2_file_path
from vaep.domain.preprocessing.preprocessing import *
# from vaep.domain.contracts.mappings import Mappings
from vaep.domain.contracts.modelling_data_contract import ModellingDataContract
from vaep.domain.modelling.hyperparameter_tuning import XGBHyperparameterTuner
from vaep.domain.modelling.supermodel import SuperXGBClassifier
from vaep.domain.modelling.optuna_xgb_param_grid import OptunaXGBParamGrid

pd.options.display.max_rows = 100
pd.options.display.max_columns = 999

%load_ext autoreload
%autoreload 2

Model Version

In [None]:
model_version = 2
model_name = 'vaep_conceding'
model_file_name = model_name + '_v' + str(model_version)

model_output_path = "/Users/ciaran/Documents/Projects/AFL/git-repositories/afl-player-ratings/vaep/model_outputs/models"
prediction_output_path = "/Users/ciaran/Documents/Projects/AFL/git-repositories/afl-player-ratings/vaep/model_outputs/predictions"

preprocessor_file_name = 'preprocessor_v' + str(model_version)
preprocessor_output_path = ""

In [None]:
RESPONSE = ModellingDataContract.RESPONSE_CONCEDES

In [None]:
FEATURES = ModellingDataContract.feature_list_scores

In [None]:
MONOTONE_CONSTRAINTS = {}

Load Data

In [None]:
df_modelling = pd.read_csv(vaep_modelling_v2_file_path)
df_modelling.tail()

Train Test Split

In [None]:
training_data = df_modelling[df_modelling[RESPONSE+"TrainingSet"]]
test_data = df_modelling[df_modelling[RESPONSE+"TestSet"]]
cal_data = df_modelling[df_modelling[RESPONSE+"ValidationSet"]]

In [None]:
X_train, y_train = training_data.drop(columns=[RESPONSE]), training_data[RESPONSE]
X_test, y_test = test_data.drop(columns=[RESPONSE]), test_data[RESPONSE]
X_cal, y_cal = cal_data.drop(columns=[RESPONSE]), cal_data[RESPONSE]

In [None]:
y_train.mean(), y_test.mean(), y_cal.mean()

In [None]:
X_test.head()

Preprocess Data

In [None]:
# preprocessor = DataPreprocessor()
# preprocessor.fit(X_train)

# X_train = preprocessor.transform(X_train)
# X_test = preprocessor.transform(X_test)

In [None]:
X_train_preproc = X_train[FEATURES]
X_test_preproc = X_test[FEATURES]
X_cal_preproc = X_cal[FEATURES]

In [None]:
X_train_preproc.shape, X_cal_preproc.shape, X_test_preproc.shape

In [None]:
X_train_preproc.head()

In [None]:
X_test_preproc.head()

Optuna Hyperparameter Tuning Class - HyperParameterTuner & XGBHyperparameterTuner

In [None]:
xgb_tuner = XGBHyperparameterTuner(X_train_preproc, y_train, monotonicity_constraints={})

In [None]:
xgb_tuner.tune_hyperparameters()

In [None]:
params = xgb_tuner.get_best_params()
params

Training Model - SuperXGBClassifier class for training and predictions

In [None]:
params['objective'] = OptunaXGBParamGrid.error
params['num_rounds'] = 1000
params['early_stopping_rounds'] = 50
params['verbosity'] = 1
params['monotone_constraints'] = {}

In [None]:
super_xgb = SuperXGBClassifier(X_train = X_train_preproc, 
                               y_train = y_train, 
                               X_test = X_test_preproc, 
                               y_test = y_test,
                               X_cal = X_cal_preproc,
                               y_cal = y_cal,
                               params = params)

In [None]:
super_xgb.fit()

In [None]:
super_xgb.xgb_model

In [None]:
super_xgb.xgb_model.get_booster().feature_names

In [None]:
train_preds = super_xgb.predict(X_train_preproc)
test_preds = super_xgb.predict(X_test_preproc)

In [None]:
train_probas = super_xgb.predict_proba(X_train_preproc)[:, 1]
test_probas = super_xgb.predict_proba(X_test_preproc)[:, 1]
cal_probas = super_xgb.predict_proba(X_cal_preproc)[:, 1]

In [None]:
super_xgb.calibrate()

In [None]:
train_cal_probas = super_xgb.predict_proba(X_train_preproc, calibrate=True)
test_cal_probas = super_xgb.predict_proba(X_test_preproc, calibrate=True)
cal_cal_probas = super_xgb.predict_proba(X_cal_preproc, calibrate=True)

Check Average Predictions

In [None]:
train_probas.mean(), y_train.mean(), train_cal_probas.mean()

In [None]:
test_probas.mean(), y_test.mean(), test_cal_probas.mean()

In [None]:
cal_probas.mean(), y_cal.mean(), cal_cal_probas.mean()

Export model

In [None]:
super_xgb.export_model(model_output_path + "/" + model_file_name + ".joblib")

Export data and predictions

In [None]:
train_export = pd.concat([X_train, y_train], axis=1)
train_export['xgb_preds_'+RESPONSE] = train_preds
train_export['xgb_probas_'+RESPONSE] = train_probas
train_export['xgb_probas_cal_'+RESPONSE] = train_cal_probas
train_export.to_csv(prediction_output_path + '/train_predictions_' + model_file_name + '.csv', index = False)
train_export.head()

In [None]:
test_export = pd.concat([X_test, y_test], axis=1)
test_export['xgb_preds_'+RESPONSE] = test_preds
test_export['xgb_probas_'+RESPONSE] = test_probas
test_export['xgb_probas_cal_'+RESPONSE] = test_cal_probas
test_export.to_csv(prediction_output_path + '/test_predictions_' + model_file_name + '.csv', index = False)
test_export.head()

Save preprocessor

In [None]:
# joblib.dump(preprocessor, preprocessor_output_path + preprocessor_file_name + ".joblib")