Total Game Score Model - Hyperparameter Tuning - GBM

In [36]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append('..')
from total_points_model.config import raw_data_file_path
from total_points_model.domain.contracts.modelling_data_contract import ModellingDataContract
from total_points_model.domain.contracts.mappings import Mappings
from total_points_model.domain.preprocessing.data_preprocessor import DataPreprocessor
from total_points_model.domain.modelling.hyperparameter_tuning import XGBHyperparameterTuner, XGBYearHyperparameterTuner

pd.options.display.max_rows = 100
pd.options.display.max_columns = 999

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Load Data

In [2]:
afl_data = pd.read_csv(raw_data_file_path)
afl_data.head(2)

Unnamed: 0,Home_Team,Venue,Round_ID,Match_ID,Year,Away_Team,Q1_Score,Q2_Score,Q3_Score,Q4_Score,Margin,Total_Game_Score,Home_Win,City,Date,Attendance,Home_Coach_ID,Away_Coach_ID,Q5_Score,Temperature,Weather_Type,Match_Status,Weather_Description,Ground_Width,Ground_Length,Home_Ground,ModellingFilter,DateTime
0,Brisbane Lions,Gabba,200501,200501_BrisbaneLions_StKilda,2005.0,St Kilda,4.1.25 - 2.4.16,10.5.65 - 6.6.42,14.5.89 - 12.11.83,18.8.116 - 13.15.93,23.0,209.0,1.0,Brisbane,2005-03-24,33369.0,Leigh_Matthews,Grant_Thomas,,18.0,MOSTLY_SUNNY,CONCLUDED,Mostly Sunny,138,156,Primary Home,True,2005-03-24 20:10:00
1,North Melbourne,Docklands,200501,200501_NorthMelbourne_Carlton,2005.0,Carlton,3.4.22 - 3.5.23,6.6.42 - 5.8.38,10.7.67 - 7.10.52,16.9.105 - 12.13.85,20.0,190.0,1.0,Melbourne,2005-03-26,40345.0,Dean_Laidley,Denis_Pagan,,18.0,MOSTLY_SUNNY,CONCLUDED,Mostly Sunny,129,160,Primary Home,True,2005-03-26 13:45:00


In [3]:
training_data = afl_data[afl_data['ModellingFilter']]

In [4]:
response = ModellingDataContract.response

In [5]:
X, y = training_data.drop(columns = [response]), training_data[response]

Preprocess Data

In [6]:
preprocessor = DataPreprocessor(mapping=Mappings.mappings)

In [7]:
preprocessor.fit(X)

In [8]:
X_preproc = preprocessor.transform(X)

In [9]:
X_preproc.head()

Unnamed: 0,Round,Year,Temperature,random5,Home_Total_Q4_Score_avg2,Home_Total_Q4_Goals_avg2,Home_Total_Q4_Behinds_avg2,Home_Total_Q4_Shots_avg2,Home_Total_Q4_Conversion_avg2,Home_Att_Q4_Score_avg2,Home_Att_Q4_Goals_avg2,Home_Att_Q4_Behinds_avg2,Home_Att_Q4_Shots_avg2,Home_Att_Q4_Conversion_avg2,Home_Def_Q4_Score_avg2,Home_Def_Q4_Goals_avg2,Home_Def_Q4_Behinds_avg2,Home_Def_Q4_Shots_avg2,Home_Def_Q4_Conversion_avg2,Away_Total_Q4_Score_avg2,Away_Total_Q4_Goals_avg2,Away_Total_Q4_Behinds_avg2,Away_Total_Q4_Shots_avg2,Away_Total_Q4_Conversion_avg2,Away_Att_Q4_Score_avg2,Away_Att_Q4_Goals_avg2,Away_Att_Q4_Behinds_avg2,Away_Att_Q4_Shots_avg2,Away_Att_Q4_Conversion_avg2,Away_Def_Q4_Score_avg2,Away_Def_Q4_Goals_avg2,Away_Def_Q4_Behinds_avg2,Away_Def_Q4_Shots_avg2,Away_Def_Q4_Conversion_avg2,Home_Team_Adelaide,Home_Team_Brisbane Lions,Home_Team_Carlton,Home_Team_Collingwood,Home_Team_Essendon,Home_Team_Fremantle,Home_Team_Geelong,Home_Team_Gold Coast,Home_Team_Greater Western Sydney,Home_Team_Hawthorn,Home_Team_Melbourne,Home_Team_North Melbourne,Home_Team_Port Adelaide,Home_Team_Richmond,Home_Team_St Kilda,Home_Team_Sydney,Home_Team_West Coast,Home_Team_Western Bulldogs,Away_Team_Adelaide,Away_Team_Brisbane Lions,Away_Team_Carlton,Away_Team_Collingwood,Away_Team_Essendon,Away_Team_Fremantle,Away_Team_Geelong,Away_Team_Gold Coast,Away_Team_Greater Western Sydney,Away_Team_Hawthorn,Away_Team_Melbourne,Away_Team_North Melbourne,Away_Team_Port Adelaide,Away_Team_Richmond,Away_Team_St Kilda,Away_Team_Sydney,Away_Team_West Coast,Away_Team_Western Bulldogs,Venue_Adelaide Oval,Venue_Bellerive Oval,Venue_Blacktown,Venue_Carrara,Venue_Cazalys Stadium,Venue_Docklands,Venue_Eureka Stadium,Venue_Football Park,Venue_Gabba,Venue_Jiangwan Stadium,Venue_Kardinia Park,Venue_M.C.G.,Venue_Manuka Oval,Venue_Marrara Oval,Venue_Perth Stadium,Venue_Princes Park,Venue_S.C.G.,Venue_Stadium Australia,Venue_Subiaco,Venue_Sydney Showground,Venue_Traeger Park,Venue_Wellington,Venue_York Park,City_Adelaide,City_Alice Springs,City_Ballarat,City_Brisbane,City_Cairns,City_Canberra,City_Darwin,City_Geelong,City_Gold Coast,City_Hobart,City_Launceston,City_Melbourne,City_Perth,City_Shanghai,City_Sydney,City_Wellington,Weather_Type_Bad,Weather_Type_Good
0,1,2005.0,18.0,4,181.188342,26.305829,23.35337,49.659199,0.528876,91.5,13.281603,11.810383,25.091985,0.526871,89.688342,13.024226,11.542987,24.567213,0.528248,181.774299,26.394976,23.404441,49.799418,0.52913,90.031853,13.076629,11.572079,24.648708,0.528787,91.742446,13.318347,11.832363,25.15071,0.52722,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,1,2005.0,18.0,1,181.188342,26.305829,23.35337,49.659199,0.528876,91.5,13.281603,11.810383,25.091985,0.526871,89.688342,13.024226,11.542987,24.567213,0.528248,181.774299,26.394976,23.404441,49.799418,0.52913,90.031853,13.076629,11.572079,24.648708,0.528787,91.742446,13.318347,11.832363,25.15071,0.52722,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
2,1,2005.0,18.0,4,181.188342,26.305829,23.35337,49.659199,0.528876,91.5,13.281603,11.810383,25.091985,0.526871,89.688342,13.024226,11.542987,24.567213,0.528248,181.774299,26.394976,23.404441,49.799418,0.52913,90.031853,13.076629,11.572079,24.648708,0.528787,91.742446,13.318347,11.832363,25.15071,0.52722,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
3,1,2005.0,18.0,1,181.188342,26.305829,23.35337,49.659199,0.528876,91.5,13.281603,11.810383,25.091985,0.526871,89.688342,13.024226,11.542987,24.567213,0.528248,181.774299,26.394976,23.404441,49.799418,0.52913,90.031853,13.076629,11.572079,24.648708,0.528787,91.742446,13.318347,11.832363,25.15071,0.52722,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1
4,1,2005.0,18.0,2,181.188342,26.305829,23.35337,49.659199,0.528876,91.5,13.281603,11.810383,25.091985,0.526871,89.688342,13.024226,11.542987,24.567213,0.528248,181.774299,26.394976,23.404441,49.799418,0.52913,90.031853,13.076629,11.572079,24.648708,0.528787,91.742446,13.318347,11.832363,25.15071,0.52722,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1


Optuna Hyperparameter Tuning Class - HyperParameterTuner & XGBHyperparameterTuner

TimeSeriesCrossValidation

- Custom folds to get each year as a validation sample, only using years before that in training.

In [37]:
xgb_tuner = XGBYearHyperparameterTuner(X_preproc, y)

In [43]:
xgb_tuner.tune_hyperparameters()

[32m[I 2023-05-16 15:33:40,874][0m A new study created in memory with name: no-name-606518bc-41c4-4675-bb8d-69eac97d3ef5[0m
[32m[I 2023-05-16 15:33:41,136][0m Trial 0 finished with value: 170.3831738971774 and parameters: {'max_depth': 16, 'min_child_weight': 17, 'eta': 0.007496408558560726, 'gamma': 20.0, 'lambda': 0.17511028137269405, 'alpha': 0.0005615269203062599, 'subsample': 0.2250184915983319, 'colsample_bytree': 0.33510382555525714}. Best is trial 0 with value: 170.3831738971774.[0m
[32m[I 2023-05-16 15:33:41,379][0m Trial 1 finished with value: 138.047995165278 and parameters: {'max_depth': 2, 'min_child_weight': 7, 'eta': 0.02856837812214769, 'gamma': 20.0, 'lambda': 0.00121439200667942, 'alpha': 0.010327622182784543, 'subsample': 0.4843465480187639, 'colsample_bytree': 0.2982701983203877}. Best is trial 1 with value: 138.047995165278.[0m
[32m[I 2023-05-16 15:33:41,628][0m Trial 2 finished with value: 174.91626063626668 and parameters: {'max_depth': 8, 'min_child_w

Number of finished trials:  1000
Best trial:
  Value: 26.968227522745664
  Params: 
    max_depth: 2
    min_child_weight: 12
    eta: 0.40232106247872074
    gamma: 20.0
    lambda: 0.0010839538861702367
    alpha: 0.00042705796835734066
    subsample: 0.7135251853956766
    colsample_bytree: 0.2221106252008631


<optuna.study.study.Study at 0x7fccc1432340>

In [44]:
xgb_tuner.get_best_params()

{'max_depth': 2,
 'min_child_weight': 12,
 'eta': 0.40232106247872074,
 'gamma': 20.0,
 'lambda': 0.0010839538861702367,
 'alpha': 0.00042705796835734066,
 'subsample': 0.7135251853956766,
 'colsample_bytree': 0.2221106252008631}

Viz

In [45]:
from optuna.visualization import plot_optimization_history
plot_optimization_history(xgb_tuner.study)

In [46]:
from optuna.visualization import plot_intermediate_values
plot_intermediate_values(xgb_tuner.study)

[33m[W 2023-05-16 15:38:11,054][0m You need to set up the pruning feature to utilize `plot_intermediate_values()`[0m


In [47]:
from optuna.visualization import plot_param_importances
plot_param_importances(xgb_tuner.study)