Total Game Score Model - Model Evaluation - GBM

In [None]:
import pandas as pd
import numpy as np
import joblib
import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append('..')
from total_points_model.domain.preprocessing.data_preprocessor import DataPreprocessor
from total_points_model.domain.modelling.hyperparameter_tuning import XGBHyperparameterTuner
from total_points_model.domain.modelling.supermodel import SuperXGBRegressor
from total_points_model.domain.modelling.model_evaluation import XGBRegressorEvaluator

pd.options.display.max_rows = 100
pd.options.display.max_columns = 999

%load_ext autoreload
%autoreload 2

Load Data

In [None]:
afl_data = pd.read_csv("data/merged-data/afl_match_data.csv")

In [None]:
afl_data['ModellingFilter'] = np.where(afl_data['Date'] < "2019-01-01", True, False)
afl_data = afl_data[afl_data['Year'] >= 2010]

afl_data = afl_data.rename(columns = {'Total Game Score':'Total_Game_Score'})

In [None]:
training_data = afl_data[afl_data['ModellingFilter']]
test_data = afl_data[~afl_data['ModellingFilter']]

In [None]:
response = "Total_Game_Score"

In [None]:
X_train, y_train = training_data.drop(columns = [response]), training_data[response]
X_test, y_test = test_data.drop(columns = [response]), test_data[response]

In [None]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

Preprocess Data

In [None]:
afl_rename_dict = {
    'Total Game Score':'Total_Game_Score',
    'Home Win':'Home_Win'
    }

In [None]:
preprocessor = DataPreprocessor(rename_dict=afl_rename_dict)

In [None]:
preprocessor.fit(X_train)

In [None]:
X_train_preproc = preprocessor.transform(X_train)
X_test_preproc = preprocessor.transform(X_test)

In [None]:
X_train_preproc.head()

In [None]:
X_train_preproc.shape, X_test_preproc.shape

Load Model

In [None]:
xgb_model = joblib.load("/models/xgb_regression_test_v1.joblib")

Get Predictions

In [None]:
train_preds = xgb_model.predict(X_train_preproc)
test_preds = xgb_model.predict(X_test_preproc)

Evaluate Model

In [None]:
test_evaluation_data = X_test_preproc.copy()
test_evaluation_data['xgb_preds'] = xgb_model.predict(X_test_preproc)
test_evaluation_data['old_preds'] = 170
test_evaluation_data[response] = test_data[response].reset_index(drop=True)
test_evaluation_data.head()

In [None]:
test_xgb_evals = XGBRegressorEvaluator(model = xgb_model,
                                       data = test_evaluation_data,
                                       actual_name = response,
                                       expected_name = "xgb_preds",
                                       compare_name = "old_preds"
                                       )

In [None]:
test_xgb_evals.plot_feature_importance(max_num_features=10, importance_type="total_gain")

In [None]:
test_xgb_evals.plot_shap_summary_plot()

In [None]:
feature_list = [
    'Home_Total_Q4_Score_avg2'
]

In [None]:
test_xgb_evals.plot_pdp(feature_list)

In [None]:
test_xgb_evals.plot_ice(feature_list)

In [None]:
test_xgb_evals.plot_ave()

In [None]:
test_xgb_evals.plot_feature_ave('Venue_Docklands')

In [None]:
test_xgb_evals.plot_feature_ave('Home_Total_Q4_Score_avg2')

Model Metrics

In [None]:
test_xgb_evals.get_mae()

In [None]:
test_xgb_evals.get_mse()

In [None]:
test_xgb_evals.get_r2_score()

Training Evaluation

In [None]:
train_evaluation_data = X_train_preproc.copy()
train_evaluation_data['xgb_preds'] = xgb_model.predict(X_train_preproc)
train_evaluation_data['old_preds'] = 170
train_evaluation_data[response] = training_data[response].reset_index(drop=True)
train_evaluation_data.head()

In [None]:
train_xgb_evals = XGBRegressorEvaluator(model = xgb_model,
                                       data = train_evaluation_data,
                                       actual_name = response,
                                       expected_name = "xgb_preds",
                                       compare_name='old_preds'
                                       )

In [None]:
train_xgb_evals.get_mae()

In [None]:
train_xgb_evals.get_mse()

In [None]:
train_xgb_evals.get_r2_score()

In [None]:
train_xgb_evals.plot_shap_summary_plot()

In [None]:
train_xgb_evals.plot_ave()

In [None]:
train_xgb_evals.plot_feature_ave('Venue_Docklands')

In [None]:
train_xgb_evals.plot_feature_ave('Home_Total_Q4_Score_avg2')

Double Lift

In [None]:
test_xgb_evals.plot_double_lift_chart()

In [None]:
train_xgb_evals.plot_double_lift_chart()