Total Game Score Model - Model Tuning and Building - GBM

In [3]:
import pandas as pd
import numpy as np
import joblib
import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append('..')
from total_points_model.config import raw_data_file_path
from total_points_model.domain.preprocessing.data_preprocessor import DataPreprocessor
from total_points_model.domain.contracts.mappings import Mappings
from total_points_model.domain.contracts.modelling_data_contract import ModellingDataContract
from total_points_model.domain.modelling.hyperparameter_tuning import XGBHyperparameterTuner, XGBYearHyperparameterTuner
from total_points_model.domain.modelling.supermodel import SuperXGBRegressor

pd.options.display.max_rows = 100
pd.options.display.max_columns = 999

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Model Version

In [4]:
model_version = 2
model_name = 'xgb_total_points'
model_file_name = model_name + '_v' + str(model_version)

model_output_path = "/Users/ciaran/Documents/Projects/AFL/total-points-score-model/afl-total-points-model/total_points_model/models/"
prediction_output_path = "/Users/ciaran/Documents/Projects/AFL/total-points-score-model/afl-total-points-model/total_points_model/predictions/"

preprocessor_file_name = 'preprocessor_v' + str(model_version)
preprocessor_output_path = "/Users/ciaran/Documents/Projects/AFL/total-points-score-model/afl-total-points-model/total_points_model/preprocessors/"

Load Data

In [32]:
afl_data = pd.read_csv(raw_data_file_path)
afl_data = afl_data[afl_data['Year'] < 2023]

In [33]:
training_data = afl_data[afl_data['ModellingFilter']]
test_data = afl_data[~afl_data['ModellingFilter']]

In [34]:
response = ModellingDataContract.response

In [35]:
X_train, y_train = training_data.drop(columns = [response]), training_data[response]
X_test, y_test = test_data.drop(columns = [response]), test_data[response]

In [36]:
test_data.tail()

Unnamed: 0,Home_Team,Venue,Round_ID,Match_ID,Year,Away_Team,Q1_Score,Q2_Score,Q3_Score,Q4_Score,Margin,Total_Game_Score,Home_Win,City,Date,Attendance,Home_Coach_ID,Away_Coach_ID,Q5_Score,Temperature,Weather_Type,Match_Status,Weather_Description,Ground_Width,Ground_Length,Home_Ground,ModellingFilter,DateTime
3533,Melbourne,M.C.G.,2022F2,2022F2_Melbourne_BrisbaneLions,2022.0,Brisbane Lions,3.6.24 - 1.3.9,6.8.44 - 3.4.22,8.11.59 - 9.5.59,11.13.79 - 14.8.92,-13.0,171.0,0.0,Melbourne,2022-09-09,62162.0,Simon_Goodwin,Chris_Fagan,,20.0,RAIN,CONCLUDED,Shower or two,141,160,Primary Home,False,2022-09-09 19:20:00
3534,Collingwood,M.C.G.,2022F2,2022F2_Collingwood_Fremantle,2022.0,Fremantle,4.0.24 - 0.1.1,6.6.42 - 2.2.14,8.12.60 - 4.4.28,11.13.79 - 9.5.59,20.0,138.0,1.0,Melbourne,2022-09-10,90612.0,Craig_McRae,Justin_Longmuir,,15.0,RAIN,CONCLUDED,Showers,141,160,Primary Home,False,2022-09-10 18:55:00
3535,Geelong,M.C.G.,2022F3,2022F3_Geelong_BrisbaneLions,2022.0,Brisbane Lions,4.2.26 - 2.0.12,7.7.49 - 3.1.19,14.11.95 - 5.2.32,18.12.120 - 7.7.49,71.0,169.0,1.0,Melbourne,2022-09-16,77677.0,Chris_Scott,Chris_Fagan,,17.0,RAIN,CONCLUDED,Rain at times,141,160,Neutral,False,2022-09-16 19:20:00
3536,Sydney,S.C.G.,2022F3,2022F3_Sydney_Collingwood,2022.0,Collingwood,6.3.39 - 3.0.18,11.7.73 - 7.1.43,13.10.88 - 10.5.65,14.11.95 - 14.10.94,1.0,189.0,1.0,Sydney,2022-09-17,45608.0,John_Longmire,Craig_McRae,,23.0,MOSTLY_SUNNY,CONCLUDED,Partly cloudy,136,155,Primary Home,False,2022-09-17 16:15:00
3537,Geelong,M.C.G.,2022F4,2022F4_Geelong_Sydney,2022.0,Sydney,6.5.41 - 1.0.6,9.8.62 - 4.2.26,15.11.101 - 4.3.27,20.13.133 - 8.4.52,81.0,185.0,1.0,Melbourne,2022-09-24,100024.0,Chris_Scott,John_Longmire,,15.0,RAIN,CONCLUDED,Morning shower or two,141,160,Neutral,False,2022-09-24 14:00:00


Preprocess Data

In [37]:
preprocessor = DataPreprocessor(mapping=Mappings.mappings)

In [38]:
preprocessor.fit(X_train)

In [39]:
X_train_preproc = preprocessor.transform(X_train)
X_test_preproc = preprocessor.transform(X_test)

In [40]:
X_train_preproc.head()

Unnamed: 0,Round,Year,Temperature,random5,Home_Total_Q4_Score_avg2,Home_Total_Q4_Goals_avg2,Home_Total_Q4_Behinds_avg2,Home_Total_Q4_Shots_avg2,Home_Total_Q4_Conversion_avg2,Home_Att_Q4_Score_avg2,Home_Att_Q4_Goals_avg2,Home_Att_Q4_Behinds_avg2,Home_Att_Q4_Shots_avg2,Home_Att_Q4_Conversion_avg2,Home_Def_Q4_Score_avg2,Home_Def_Q4_Goals_avg2,Home_Def_Q4_Behinds_avg2,Home_Def_Q4_Shots_avg2,Home_Def_Q4_Conversion_avg2,Away_Total_Q4_Score_avg2,Away_Total_Q4_Goals_avg2,Away_Total_Q4_Behinds_avg2,Away_Total_Q4_Shots_avg2,Away_Total_Q4_Conversion_avg2,Away_Att_Q4_Score_avg2,Away_Att_Q4_Goals_avg2,Away_Att_Q4_Behinds_avg2,Away_Att_Q4_Shots_avg2,Away_Att_Q4_Conversion_avg2,Away_Def_Q4_Score_avg2,Away_Def_Q4_Goals_avg2,Away_Def_Q4_Behinds_avg2,Away_Def_Q4_Shots_avg2,Away_Def_Q4_Conversion_avg2,Home_Team_Adelaide,Home_Team_Brisbane Lions,Home_Team_Carlton,Home_Team_Collingwood,Home_Team_Essendon,Home_Team_Fremantle,Home_Team_Geelong,Home_Team_Gold Coast,Home_Team_Greater Western Sydney,Home_Team_Hawthorn,Home_Team_Melbourne,Home_Team_North Melbourne,Home_Team_Port Adelaide,Home_Team_Richmond,Home_Team_St Kilda,Home_Team_Sydney,Home_Team_West Coast,Home_Team_Western Bulldogs,Away_Team_Adelaide,Away_Team_Brisbane Lions,Away_Team_Carlton,Away_Team_Collingwood,Away_Team_Essendon,Away_Team_Fremantle,Away_Team_Geelong,Away_Team_Gold Coast,Away_Team_Greater Western Sydney,Away_Team_Hawthorn,Away_Team_Melbourne,Away_Team_North Melbourne,Away_Team_Port Adelaide,Away_Team_Richmond,Away_Team_St Kilda,Away_Team_Sydney,Away_Team_West Coast,Away_Team_Western Bulldogs,Venue_Adelaide Oval,Venue_Bellerive Oval,Venue_Blacktown,Venue_Carrara,Venue_Cazalys Stadium,Venue_Docklands,Venue_Eureka Stadium,Venue_Football Park,Venue_Gabba,Venue_Jiangwan Stadium,Venue_Kardinia Park,Venue_M.C.G.,Venue_Manuka Oval,Venue_Marrara Oval,Venue_Perth Stadium,Venue_Princes Park,Venue_S.C.G.,Venue_Stadium Australia,Venue_Subiaco,Venue_Sydney Showground,Venue_Traeger Park,Venue_Wellington,Venue_York Park,City_Adelaide,City_Alice Springs,City_Ballarat,City_Brisbane,City_Cairns,City_Canberra,City_Darwin,City_Geelong,City_Gold Coast,City_Hobart,City_Launceston,City_Melbourne,City_Perth,City_Shanghai,City_Sydney,City_Wellington,Weather_Type_Bad,Weather_Type_Good
0,1,2005.0,18.0,4,181.188342,26.305829,23.35337,49.659199,0.528876,91.5,13.281603,11.810383,25.091985,0.526871,89.688342,13.024226,11.542987,24.567213,0.528248,181.774299,26.394976,23.404441,49.799418,0.52913,90.031853,13.076629,11.572079,24.648708,0.528787,91.742446,13.318347,11.832363,25.15071,0.52722,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,1,2005.0,18.0,4,181.188342,26.305829,23.35337,49.659199,0.528876,91.5,13.281603,11.810383,25.091985,0.526871,89.688342,13.024226,11.542987,24.567213,0.528248,181.774299,26.394976,23.404441,49.799418,0.52913,90.031853,13.076629,11.572079,24.648708,0.528787,91.742446,13.318347,11.832363,25.15071,0.52722,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
2,1,2005.0,18.0,1,181.188342,26.305829,23.35337,49.659199,0.528876,91.5,13.281603,11.810383,25.091985,0.526871,89.688342,13.024226,11.542987,24.567213,0.528248,181.774299,26.394976,23.404441,49.799418,0.52913,90.031853,13.076629,11.572079,24.648708,0.528787,91.742446,13.318347,11.832363,25.15071,0.52722,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1
3,1,2005.0,18.0,5,181.188342,26.305829,23.35337,49.659199,0.528876,91.5,13.281603,11.810383,25.091985,0.526871,89.688342,13.024226,11.542987,24.567213,0.528248,181.774299,26.394976,23.404441,49.799418,0.52913,90.031853,13.076629,11.572079,24.648708,0.528787,91.742446,13.318347,11.832363,25.15071,0.52722,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1
4,1,2005.0,18.0,5,181.188342,26.305829,23.35337,49.659199,0.528876,91.5,13.281603,11.810383,25.091985,0.526871,89.688342,13.024226,11.542987,24.567213,0.528248,181.774299,26.394976,23.404441,49.799418,0.52913,90.031853,13.076629,11.572079,24.648708,0.528787,91.742446,13.318347,11.832363,25.15071,0.52722,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1


In [41]:
X_train_preproc.shape, X_test_preproc.shape

((2755, 111), (783, 111))

Save preprocessor

In [42]:
joblib.dump(preprocessor, preprocessor_output_path + preprocessor_file_name + ".joblib")

['/Users/ciaran/Documents/Projects/AFL/total-points-score-model/afl-total-points-model/total_points_model/preprocessors/preprocessor_v2.joblib']

Optuna Hyperparameter Tuning Class - HyperParameterTuner & XGBHyperparameterTuner

In [50]:
xgb_tuner = XGBYearHyperparameterTuner(X_train_preproc, y_train)

In [53]:
xgb_tuner.tune_hyperparameters()

[32m[I 2023-05-16 16:02:06,788][0m A new study created in memory with name: no-name-6c2723ce-0d88-42a8-9674-97bf7d516088[0m


[0]	train-rmse:190.26098	valid-rmse:185.45936
[1]	train-rmse:188.17757	valid-rmse:183.39082
[2]	train-rmse:186.10594	valid-rmse:181.31186
[3]	train-rmse:184.05646	valid-rmse:179.30959
[4]	train-rmse:181.98250	valid-rmse:177.22802
[5]	train-rmse:179.90998	valid-rmse:175.17753
[6]	train-rmse:177.87786	valid-rmse:173.17632
[7]	train-rmse:175.87146	valid-rmse:171.19899
[8]	train-rmse:173.97030	valid-rmse:169.33927
[9]	train-rmse:172.19600	valid-rmse:167.60339
[10]	train-rmse:170.22278	valid-rmse:165.63150
[11]	train-rmse:168.32270	valid-rmse:163.72347
[12]	train-rmse:166.51562	valid-rmse:161.95613
[13]	train-rmse:164.63139	valid-rmse:160.09068
[14]	train-rmse:162.84001	valid-rmse:158.36713
[15]	train-rmse:161.10311	valid-rmse:156.70678
[16]	train-rmse:159.28414	valid-rmse:154.90591
[17]	train-rmse:157.50042	valid-rmse:153.17532
[18]	train-rmse:155.79340	valid-rmse:151.51970
[19]	train-rmse:154.14968	valid-rmse:149.92284
[20]	train-rmse:152.47147	valid-rmse:148.28015
[21]	train-rmse:150.782

[32m[I 2023-05-16 16:02:39,993][0m Trial 0 finished with value: 27.060526150760573 and parameters: {'max_depth': 11, 'min_child_weight': 14, 'eta': 0.011670412994964805, 'gamma': 0.0004875969621371861, 'lambda': 0.40422336617102045, 'alpha': 0.11953186797842831, 'subsample': 0.24504196025459563, 'colsample_bytree': 0.4034082980540764}. Best is trial 0 with value: 27.060526150760573.[0m


[0]	train-rmse:98.64745	valid-rmse:97.61502
[1]	train-rmse:53.63584	valid-rmse:55.82870
[2]	train-rmse:34.32784	valid-rmse:40.78275
[3]	train-rmse:25.60738	valid-rmse:37.93811
[4]	train-rmse:21.90845	valid-rmse:36.97763
[5]	train-rmse:20.18197	valid-rmse:37.77838
[6]	train-rmse:18.88529	valid-rmse:37.62718
[7]	train-rmse:17.43825	valid-rmse:39.42604
[8]	train-rmse:15.97929	valid-rmse:39.96395
[9]	train-rmse:15.36612	valid-rmse:41.05387
[10]	train-rmse:14.37987	valid-rmse:41.38023
[11]	train-rmse:13.06253	valid-rmse:41.93285
[12]	train-rmse:12.53866	valid-rmse:42.13453
[13]	train-rmse:11.91033	valid-rmse:42.66214
[14]	train-rmse:11.40984	valid-rmse:43.14509
[15]	train-rmse:10.62548	valid-rmse:43.30997
[16]	train-rmse:10.16511	valid-rmse:43.32170
[17]	train-rmse:10.03338	valid-rmse:43.10251
[18]	train-rmse:9.74950	valid-rmse:42.86411
[19]	train-rmse:9.19438	valid-rmse:42.99457
[20]	train-rmse:8.35628	valid-rmse:42.86281
[21]	train-rmse:8.41793	valid-rmse:42.87687
[22]	train-rmse:7.84656	

[32m[I 2023-05-16 16:02:48,204][0m Trial 1 finished with value: 37.80265303520867 and parameters: {'max_depth': 8, 'min_child_weight': 5, 'eta': 0.5035020993042132, 'gamma': 0.0005517056133503287, 'lambda': 0.024347653523161785, 'alpha': 0.00043063714518588495, 'subsample': 0.5340949818877885, 'colsample_bytree': 0.382896935817461}. Best is trial 0 with value: 27.060526150760573.[0m


[0]	train-rmse:112.22712	valid-rmse:109.74062
[1]	train-rmse:70.11681	valid-rmse:69.93007
[2]	train-rmse:47.13260	valid-rmse:46.58528
[3]	train-rmse:35.34714	valid-rmse:36.63314
[4]	train-rmse:30.09325	valid-rmse:31.97316
[5]	train-rmse:27.59796	valid-rmse:31.09741
[6]	train-rmse:26.31667	valid-rmse:30.56009
[7]	train-rmse:25.78766	valid-rmse:30.41251
[8]	train-rmse:25.10653	valid-rmse:30.52965
[9]	train-rmse:24.31351	valid-rmse:30.70824
[10]	train-rmse:23.59223	valid-rmse:30.92841
[11]	train-rmse:22.94208	valid-rmse:31.29579
[12]	train-rmse:22.66641	valid-rmse:31.38357
[13]	train-rmse:22.40485	valid-rmse:31.83068
[14]	train-rmse:21.80974	valid-rmse:32.00457
[15]	train-rmse:21.44650	valid-rmse:32.38250
[16]	train-rmse:21.25298	valid-rmse:32.29448
[17]	train-rmse:20.63599	valid-rmse:32.33675
[18]	train-rmse:20.00655	valid-rmse:32.42660
[19]	train-rmse:19.90445	valid-rmse:32.27903
[20]	train-rmse:19.37151	valid-rmse:32.53317
[21]	train-rmse:19.09096	valid-rmse:32.66371
[22]	train-rmse:18

[32m[I 2023-05-16 16:02:59,924][0m Trial 2 finished with value: 33.16016440613505 and parameters: {'max_depth': 18, 'min_child_weight': 17, 'eta': 0.42644782493810507, 'gamma': 0.0006398061535154295, 'lambda': 0.0021465961183542194, 'alpha': 0.0011959057993734545, 'subsample': 0.6570249019719254, 'colsample_bytree': 0.40060265254147365}. Best is trial 0 with value: 27.060526150760573.[0m


[0]	train-rmse:147.98238	valid-rmse:142.99788
[1]	train-rmse:114.48138	valid-rmse:109.26758
[2]	train-rmse:88.51216	valid-rmse:84.90019
[3]	train-rmse:70.20723	valid-rmse:67.43450
[4]	train-rmse:56.84488	valid-rmse:54.44124
[5]	train-rmse:47.79127	valid-rmse:45.41119
[6]	train-rmse:40.74754	valid-rmse:37.96867
[7]	train-rmse:36.06236	valid-rmse:33.39253
[8]	train-rmse:33.20605	valid-rmse:31.10657
[9]	train-rmse:32.26634	valid-rmse:30.64658
[10]	train-rmse:30.77522	valid-rmse:29.75067
[11]	train-rmse:30.07634	valid-rmse:29.71598
[12]	train-rmse:29.73376	valid-rmse:29.45873
[13]	train-rmse:28.99367	valid-rmse:28.93985
[14]	train-rmse:28.69247	valid-rmse:29.37414
[15]	train-rmse:28.57437	valid-rmse:29.66701
[16]	train-rmse:28.37023	valid-rmse:29.38995
[17]	train-rmse:28.18325	valid-rmse:29.20226
[18]	train-rmse:28.07886	valid-rmse:29.00652
[19]	train-rmse:28.10965	valid-rmse:29.38304
[20]	train-rmse:27.83105	valid-rmse:29.85436
[21]	train-rmse:27.68394	valid-rmse:29.98242
[22]	train-rmse:

[32m[I 2023-05-16 16:03:07,161][0m Trial 3 finished with value: 32.49604082569803 and parameters: {'max_depth': 19, 'min_child_weight': 16, 'eta': 0.247446573154624, 'gamma': 3.1951628431472376, 'lambda': 0.8602821095094741, 'alpha': 0.08113832275281103, 'subsample': 0.37411058928020663, 'colsample_bytree': 0.6836876369655787}. Best is trial 0 with value: 27.060526150760573.[0m


[0]	train-rmse:148.51379	valid-rmse:145.17486
[1]	train-rmse:116.20090	valid-rmse:115.24399
[2]	train-rmse:91.33019	valid-rmse:91.46911
[3]	train-rmse:71.54059	valid-rmse:74.00766
[4]	train-rmse:56.51489	valid-rmse:61.13821
[5]	train-rmse:45.01565	valid-rmse:51.74823
[6]	train-rmse:36.46726	valid-rmse:45.49282
[7]	train-rmse:29.75725	valid-rmse:41.12127
[8]	train-rmse:24.91373	valid-rmse:37.49078
[9]	train-rmse:20.97403	valid-rmse:35.75479
[10]	train-rmse:17.95699	valid-rmse:34.47135
[11]	train-rmse:15.79151	valid-rmse:34.27225
[12]	train-rmse:14.24010	valid-rmse:34.03378
[13]	train-rmse:12.94069	valid-rmse:33.57023
[14]	train-rmse:11.63740	valid-rmse:33.63884
[15]	train-rmse:10.52991	valid-rmse:33.81905
[16]	train-rmse:9.90800	valid-rmse:33.75222
[17]	train-rmse:9.20701	valid-rmse:33.83466
[18]	train-rmse:8.93260	valid-rmse:33.89034
[19]	train-rmse:8.51401	valid-rmse:34.18017
[20]	train-rmse:8.06002	valid-rmse:34.15704
[21]	train-rmse:7.75668	valid-rmse:34.22739
[22]	train-rmse:7.2247

[32m[I 2023-05-16 16:03:16,528][0m Trial 4 finished with value: 30.840695622166404 and parameters: {'max_depth': 11, 'min_child_weight': 4, 'eta': 0.23475528711582214, 'gamma': 7.96444255195129, 'lambda': 0.2608159465699878, 'alpha': 0.008934565166542337, 'subsample': 0.6843346221017296, 'colsample_bytree': 0.6743474074178675}. Best is trial 0 with value: 27.060526150760573.[0m


[0]	train-rmse:129.79376	valid-rmse:126.79276
[1]	train-rmse:88.74834	valid-rmse:87.82527
[2]	train-rmse:63.21849	valid-rmse:63.02966
[3]	train-rmse:47.71217	valid-rmse:50.13147
[4]	train-rmse:37.61012	valid-rmse:40.30502
[5]	train-rmse:32.34826	valid-rmse:34.94517
[6]	train-rmse:29.50679	valid-rmse:33.29346
[7]	train-rmse:27.78510	valid-rmse:31.48006
[8]	train-rmse:27.01759	valid-rmse:30.77603
[9]	train-rmse:27.06025	valid-rmse:31.41010
[10]	train-rmse:26.43092	valid-rmse:31.24123
[11]	train-rmse:26.08498	valid-rmse:30.72027
[12]	train-rmse:26.16064	valid-rmse:31.22415
[13]	train-rmse:25.48927	valid-rmse:31.62284
[14]	train-rmse:25.12821	valid-rmse:31.96840
[15]	train-rmse:24.97676	valid-rmse:32.04931
[16]	train-rmse:25.26484	valid-rmse:32.38768
[17]	train-rmse:24.90130	valid-rmse:32.37248
[18]	train-rmse:24.57640	valid-rmse:31.93071
[19]	train-rmse:24.37873	valid-rmse:32.21934
[20]	train-rmse:23.30833	valid-rmse:32.51755
[21]	train-rmse:22.97691	valid-rmse:32.46967
[22]	train-rmse:22

[32m[I 2023-05-16 16:03:24,292][0m Trial 5 finished with value: 35.84934310819632 and parameters: {'max_depth': 13, 'min_child_weight': 12, 'eta': 0.3343350923126728, 'gamma': 0.08053831241132783, 'lambda': 0.021983535819166327, 'alpha': 0.00015699860316653014, 'subsample': 0.412496293338835, 'colsample_bytree': 0.5437374685802374}. Best is trial 0 with value: 27.060526150760573.[0m


[0]	train-rmse:184.67073	valid-rmse:180.12226
[1]	train-rmse:177.43503	valid-rmse:173.03483
[2]	train-rmse:170.48562	valid-rmse:166.16740
[3]	train-rmse:163.65407	valid-rmse:159.59305
[4]	train-rmse:157.10243	valid-rmse:153.14527
[5]	train-rmse:150.85727	valid-rmse:147.05841
[6]	train-rmse:144.94587	valid-rmse:141.47923
[7]	train-rmse:139.08020	valid-rmse:135.87899
[8]	train-rmse:133.57005	valid-rmse:130.57385
[9]	train-rmse:128.48450	valid-rmse:125.57839
[10]	train-rmse:123.38445	valid-rmse:120.54166
[11]	train-rmse:118.56476	valid-rmse:115.99805
[12]	train-rmse:114.04938	valid-rmse:111.70246
[13]	train-rmse:109.53891	valid-rmse:107.33401
[14]	train-rmse:105.41432	valid-rmse:103.34038
[15]	train-rmse:101.32492	valid-rmse:99.63930
[16]	train-rmse:97.39020	valid-rmse:95.85389
[17]	train-rmse:93.63347	valid-rmse:92.33827
[18]	train-rmse:90.18692	valid-rmse:89.04565
[19]	train-rmse:86.93448	valid-rmse:85.93609
[20]	train-rmse:83.54756	valid-rmse:82.78135
[21]	train-rmse:80.35530	valid-rms

[32m[I 2023-05-16 16:03:38,550][0m Trial 6 finished with value: 28.270466029473514 and parameters: {'max_depth': 7, 'min_child_weight': 7, 'eta': 0.04130762735813898, 'gamma': 0.0641100626940534, 'lambda': 0.18404138999387434, 'alpha': 0.0026336917824068013, 'subsample': 0.6396719230982979, 'colsample_bytree': 0.4597143349540865}. Best is trial 0 with value: 27.060526150760573.[0m


[0]	train-rmse:188.44800	valid-rmse:183.77177
[1]	train-rmse:184.59939	valid-rmse:180.01779
[2]	train-rmse:180.86060	valid-rmse:176.37469
[3]	train-rmse:177.21055	valid-rmse:172.86226
[4]	train-rmse:173.57043	valid-rmse:169.31034
[5]	train-rmse:170.06356	valid-rmse:165.88910
[6]	train-rmse:166.60630	valid-rmse:162.59509
[7]	train-rmse:163.23893	valid-rmse:159.36570
[8]	train-rmse:159.92899	valid-rmse:156.11053
[9]	train-rmse:156.81375	valid-rmse:153.06984
[10]	train-rmse:153.61645	valid-rmse:149.90435
[11]	train-rmse:150.54816	valid-rmse:146.87653
[12]	train-rmse:147.52846	valid-rmse:143.96805
[13]	train-rmse:144.58180	valid-rmse:141.07661
[14]	train-rmse:141.72524	valid-rmse:138.27427
[15]	train-rmse:138.89797	valid-rmse:135.55287
[16]	train-rmse:136.13642	valid-rmse:132.93303
[17]	train-rmse:133.42389	valid-rmse:130.26840
[18]	train-rmse:130.77141	valid-rmse:127.66377
[19]	train-rmse:128.29635	valid-rmse:125.20706
[20]	train-rmse:125.70797	valid-rmse:122.70473
[21]	train-rmse:123.137

[32m[I 2023-05-16 16:04:03,945][0m Trial 7 finished with value: 27.73531147528984 and parameters: {'max_depth': 16, 'min_child_weight': 13, 'eta': 0.020855769641242822, 'gamma': 0.00012061749577358328, 'lambda': 0.00025256575358603304, 'alpha': 0.06220871366987318, 'subsample': 0.466422618716358, 'colsample_bytree': 0.42398580140011066}. Best is trial 0 with value: 27.060526150760573.[0m


[0]	train-rmse:133.73969	valid-rmse:131.16152
[1]	train-rmse:95.45834	valid-rmse:93.49585
[2]	train-rmse:69.73376	valid-rmse:69.08834
[3]	train-rmse:52.75769	valid-rmse:53.16064
[4]	train-rmse:41.69583	valid-rmse:42.88318
[5]	train-rmse:35.55966	valid-rmse:36.93788
[6]	train-rmse:31.99616	valid-rmse:33.38526
[7]	train-rmse:30.14842	valid-rmse:31.87614
[8]	train-rmse:29.05395	valid-rmse:31.54318
[9]	train-rmse:28.60615	valid-rmse:31.58089
[10]	train-rmse:27.78438	valid-rmse:31.96849
[11]	train-rmse:27.15165	valid-rmse:31.56318
[12]	train-rmse:26.77538	valid-rmse:31.64763
[13]	train-rmse:26.35248	valid-rmse:32.04095
[14]	train-rmse:25.96614	valid-rmse:32.10210
[15]	train-rmse:25.54391	valid-rmse:32.39089
[16]	train-rmse:25.39517	valid-rmse:32.64035
[17]	train-rmse:24.91360	valid-rmse:32.85941
[18]	train-rmse:24.76409	valid-rmse:32.67778
[19]	train-rmse:24.68287	valid-rmse:32.59134
[20]	train-rmse:24.22112	valid-rmse:32.41956
[21]	train-rmse:23.82792	valid-rmse:32.68004
[22]	train-rmse:23

[32m[I 2023-05-16 16:04:12,387][0m Trial 8 finished with value: 32.67128733079658 and parameters: {'max_depth': 16, 'min_child_weight': 19, 'eta': 0.3131352517495923, 'gamma': 0.06348801679755144, 'lambda': 0.006972386153936032, 'alpha': 0.50303509244356, 'subsample': 0.5542846840439746, 'colsample_bytree': 0.4141801051243916}. Best is trial 0 with value: 27.060526150760573.[0m


[0]	train-rmse:139.08240	valid-rmse:135.74759
[1]	train-rmse:102.18969	valid-rmse:99.36012
[2]	train-rmse:76.30483	valid-rmse:74.05284
[3]	train-rmse:58.15149	valid-rmse:57.46982
[4]	train-rmse:46.20059	valid-rmse:46.99558
[5]	train-rmse:38.27653	valid-rmse:39.20204
[6]	train-rmse:32.94774	valid-rmse:34.50449
[7]	train-rmse:30.18953	valid-rmse:32.24274
[8]	train-rmse:28.34566	valid-rmse:31.19388
[9]	train-rmse:27.31812	valid-rmse:31.10522
[10]	train-rmse:26.50857	valid-rmse:31.23087
[11]	train-rmse:25.87250	valid-rmse:31.10037
[12]	train-rmse:25.57746	valid-rmse:31.40612
[13]	train-rmse:25.13337	valid-rmse:31.20821
[14]	train-rmse:24.51158	valid-rmse:31.52682
[15]	train-rmse:24.06206	valid-rmse:31.51264
[16]	train-rmse:23.73931	valid-rmse:32.12688
[17]	train-rmse:23.45065	valid-rmse:32.39229
[18]	train-rmse:23.13914	valid-rmse:32.18452
[19]	train-rmse:23.08734	valid-rmse:32.48158
[20]	train-rmse:22.70506	valid-rmse:32.66893
[21]	train-rmse:22.51851	valid-rmse:32.74677
[22]	train-rmse:2

[32m[I 2023-05-16 16:04:21,068][0m Trial 9 finished with value: 33.10610022049389 and parameters: {'max_depth': 13, 'min_child_weight': 13, 'eta': 0.28225704495098175, 'gamma': 0.01320640532533756, 'lambda': 0.016420550897386144, 'alpha': 0.00018202784475389875, 'subsample': 0.5021505759650151, 'colsample_bytree': 0.5487281848953601}. Best is trial 0 with value: 27.060526150760573.[0m


[0]	train-rmse:190.38536	valid-rmse:185.58417
[1]	train-rmse:188.43101	valid-rmse:183.62299
[2]	train-rmse:186.42953	valid-rmse:181.61437
[3]	train-rmse:184.49979	valid-rmse:179.67762
[4]	train-rmse:182.54139	valid-rmse:177.71198
[5]	train-rmse:180.61591	valid-rmse:175.77923
[6]	train-rmse:178.71976	valid-rmse:173.87579
[7]	train-rmse:176.86975	valid-rmse:172.01854
[8]	train-rmse:175.04078	valid-rmse:170.18227
[9]	train-rmse:173.28221	valid-rmse:168.41657
[10]	train-rmse:171.41123	valid-rmse:166.53784
[11]	train-rmse:169.60767	valid-rmse:164.72669
[12]	train-rmse:167.85757	valid-rmse:162.96907
[13]	train-rmse:166.08501	valid-rmse:161.18875
[14]	train-rmse:164.33835	valid-rmse:159.44978
[15]	train-rmse:162.68933	valid-rmse:157.88861
[16]	train-rmse:160.98578	valid-rmse:156.17719
[17]	train-rmse:159.27622	valid-rmse:154.45958
[18]	train-rmse:157.63639	valid-rmse:152.87627
[19]	train-rmse:156.05543	valid-rmse:151.28764
[20]	train-rmse:154.46524	valid-rmse:149.68959
[21]	train-rmse:152.888

[32m[I 2023-05-16 16:04:52,345][0m Trial 10 finished with value: 26.72733114739912 and parameters: {'max_depth': 2, 'min_child_weight': 9, 'eta': 0.011173659100827472, 'gamma': 0.005523390408769662, 'lambda': 0.8770671044606577, 'alpha': 0.39383336401444147, 'subsample': 0.22051454301416107, 'colsample_bytree': 0.2105531204691815}. Best is trial 10 with value: 26.72733114739912.[0m


[0]	train-rmse:190.35833	valid-rmse:185.55705
[1]	train-rmse:188.36387	valid-rmse:183.55561
[2]	train-rmse:186.33715	valid-rmse:181.52165
[3]	train-rmse:184.38255	valid-rmse:179.55995
[4]	train-rmse:182.42594	valid-rmse:177.59609
[5]	train-rmse:180.47618	valid-rmse:175.63897
[6]	train-rmse:178.55655	valid-rmse:173.71195
[7]	train-rmse:176.67508	valid-rmse:171.82310
[8]	train-rmse:174.82416	valid-rmse:169.96478
[9]	train-rmse:173.04099	valid-rmse:168.17436
[10]	train-rmse:171.14689	valid-rmse:166.27240
[11]	train-rmse:169.32283	valid-rmse:164.44063
[12]	train-rmse:167.55418	valid-rmse:162.66437
[13]	train-rmse:165.79834	valid-rmse:160.90083
[14]	train-rmse:164.01473	valid-rmse:159.12568
[15]	train-rmse:162.34596	valid-rmse:157.54779
[16]	train-rmse:160.62417	valid-rmse:155.81802
[17]	train-rmse:158.89548	valid-rmse:154.08116
[18]	train-rmse:157.27002	valid-rmse:152.44788
[19]	train-rmse:155.67313	valid-rmse:150.84316
[20]	train-rmse:154.10079	valid-rmse:149.26299
[21]	train-rmse:152.500

[32m[I 2023-05-16 16:05:22,133][0m Trial 11 finished with value: 26.680476422906512 and parameters: {'max_depth': 2, 'min_child_weight': 9, 'eta': 0.011353783786159047, 'gamma': 0.0032884192176357505, 'lambda': 0.9847687751489038, 'alpha': 0.9360313661719437, 'subsample': 0.2129117543229359, 'colsample_bytree': 0.2168805495988433}. Best is trial 11 with value: 26.680476422906512.[0m


[0]	train-rmse:190.57599	valid-rmse:185.77546
[1]	train-rmse:188.81914	valid-rmse:184.01248
[2]	train-rmse:186.98270	valid-rmse:182.16953
[3]	train-rmse:185.23777	valid-rmse:180.41829
[4]	train-rmse:183.50833	valid-rmse:178.68251
[5]	train-rmse:181.77086	valid-rmse:176.93855
[6]	train-rmse:180.04631	valid-rmse:175.20745
[7]	train-rmse:178.36237	valid-rmse:173.51701
[8]	train-rmse:176.69457	valid-rmse:171.84266
[9]	train-rmse:175.09950	valid-rmse:170.24123
[10]	train-rmse:173.39554	valid-rmse:168.53036
[11]	train-rmse:171.73695	valid-rmse:166.86492
[12]	train-rmse:170.12628	valid-rmse:165.24749
[13]	train-rmse:168.53336	valid-rmse:163.64777
[14]	train-rmse:166.92839	valid-rmse:162.03584
[15]	train-rmse:165.40668	valid-rmse:160.59483
[16]	train-rmse:163.84040	valid-rmse:159.02147
[17]	train-rmse:162.27505	valid-rmse:157.44891
[18]	train-rmse:160.79626	valid-rmse:155.96320
[19]	train-rmse:159.34357	valid-rmse:154.50359
[20]	train-rmse:157.92159	valid-rmse:153.07474
[21]	train-rmse:156.465

[32m[I 2023-05-16 16:05:56,153][0m Trial 12 finished with value: 26.663947184952583 and parameters: {'max_depth': 2, 'min_child_weight': 9, 'eta': 0.010083354133966086, 'gamma': 0.007321736409845373, 'lambda': 0.8295516395258175, 'alpha': 0.9996519231030649, 'subsample': 0.20306121131194263, 'colsample_bytree': 0.203047739546398}. Best is trial 12 with value: 26.663947184952583.[0m


[0]	train-rmse:184.07214	valid-rmse:179.50245
[1]	train-rmse:176.37470	valid-rmse:171.96014
[2]	train-rmse:168.96858	valid-rmse:164.71187
[3]	train-rmse:162.12644	valid-rmse:158.07737
[4]	train-rmse:155.47375	valid-rmse:151.53784
[5]	train-rmse:149.12128	valid-rmse:145.40449
[6]	train-rmse:142.79576	valid-rmse:139.12679
[7]	train-rmse:136.76500	valid-rmse:133.17612
[8]	train-rmse:131.24778	valid-rmse:127.81349
[9]	train-rmse:126.18094	valid-rmse:122.66409
[10]	train-rmse:120.85576	valid-rmse:117.44516
[11]	train-rmse:116.12617	valid-rmse:112.67161
[12]	train-rmse:111.55877	valid-rmse:108.44411
[13]	train-rmse:107.15475	valid-rmse:103.98477
[14]	train-rmse:102.99906	valid-rmse:99.95137
[15]	train-rmse:98.99252	valid-rmse:96.21132
[16]	train-rmse:94.97110	valid-rmse:92.34423
[17]	train-rmse:91.08995	valid-rmse:88.53200
[18]	train-rmse:87.61153	valid-rmse:85.15184
[19]	train-rmse:84.48719	valid-rmse:82.05769
[20]	train-rmse:81.26247	valid-rmse:78.92764
[21]	train-rmse:78.21143	valid-rmse:

[33m[W 2023-05-16 16:06:03,851][0m Trial 13 failed with parameters: {'max_depth': 2, 'min_child_weight': 9, 'eta': 0.04293969275926738, 'gamma': 0.006880065749788552, 'lambda': 0.11099747222743224, 'alpha': 0.7903068967956585, 'subsample': 0.31176833900356216, 'colsample_bytree': 0.2364296422720985} because of the following error: KeyboardInterrupt().[0m
Traceback (most recent call last):
  File "/Users/ciaran/opt/anaconda3/envs/afl-total-model-env/lib/python3.9/site-packages/optuna/study/_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "/Users/ciaran/Documents/Projects/AFL/total-points-score-model/afl-total-points-model/notebooks/../total_points_model/domain/modelling/hyperparameter_tuning.py", line 170, in objective
    bst = xgb.train(param,
  File "/Users/ciaran/opt/anaconda3/envs/afl-total-model-env/lib/python3.9/site-packages/xgboost/core.py", line 620, in inner_f
    return func(**kwargs)
  File "/Users/ciaran/opt/anaconda3/envs/afl-total-model-

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/Users/ciaran/opt/anaconda3/envs/afl-total-model-env/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3505, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/fm/_t15h8ls56q08qggzncfm73w0000gn/T/ipykernel_92737/2238644649.py", line 1, in <module>
    xgb_tuner.tune_hyperparameters()
  File "/Users/ciaran/Documents/Projects/AFL/total-points-score-model/afl-total-points-model/notebooks/../total_points_model/domain/modelling/hyperparameter_tuning.py", line 190, in tune_hyperparameters
    self.study.optimize(self.objective, n_trials=self.trials)
  File "/Users/ciaran/opt/anaconda3/envs/afl-total-model-env/lib/python3.9/site-packages/optuna/study/study.py", line 425, in optimize
    _optimize(
  File "/Users/ciaran/opt/anaconda3/envs/afl-total-model-env/lib/python3.9/site-packages/optuna/study/_optimize.py", line 66, in _optimize
    _optimize_sequential(
  File "/Users/ciaran/opt/anaconda3/en

In [54]:
params = xgb_tuner.get_best_params()
params

{'max_depth': 2,
 'min_child_weight': 9,
 'eta': 0.010083354133966086,
 'gamma': 0.007321736409845373,
 'lambda': 0.8295516395258175,
 'alpha': 0.9996519231030649,
 'subsample': 0.20306121131194263,
 'colsample_bytree': 0.203047739546398}

Training Model - SuperXGBRegressor class for training and predictions

In [55]:
params['objective'] = 'reg:squarederror'
params['num_rounds'] = 1000
params['early_stopping_rounds'] = 50
params['verbosity'] = 1

In [56]:
super_xgb = SuperXGBRegressor(X_train = X_train_preproc, 
                              y_train = y_train, 
                              X_test = X_test_preproc, 
                              y_test = y_test, 
                              params = params)

In [57]:
super_xgb.fit()

[0]	validation_0-rmse:182.14162	validation_1-rmse:154.61528
[1]	validation_0-rmse:180.34751	validation_1-rmse:152.92164
[2]	validation_0-rmse:178.57900	validation_1-rmse:151.31210
[3]	validation_0-rmse:176.83252	validation_1-rmse:149.67281
[4]	validation_0-rmse:175.07310	validation_1-rmse:148.02836
[5]	validation_0-rmse:173.38689	validation_1-rmse:146.38961
[6]	validation_0-rmse:171.68628	validation_1-rmse:144.74291
[7]	validation_0-rmse:170.00732	validation_1-rmse:143.16925
[8]	validation_0-rmse:168.34991	validation_1-rmse:141.63236
[9]	validation_0-rmse:166.71366	validation_1-rmse:140.12933
[10]	validation_0-rmse:165.08847	validation_1-rmse:138.68202
[11]	validation_0-rmse:163.48334	validation_1-rmse:137.19531
[12]	validation_0-rmse:161.89088	validation_1-rmse:135.74921
[13]	validation_0-rmse:160.31800	validation_1-rmse:134.34178
[14]	validation_0-rmse:158.76697	validation_1-rmse:132.87894
[15]	validation_0-rmse:157.24421	validation_1-rmse:131.51508
[16]	validation_0-rmse:155.72367	v

In [58]:
super_xgb.xgb_model

In [59]:
test_preds = super_xgb.predict(X_test_preproc)

Export model

In [60]:
super_xgb.export_model(model_output_path + model_file_name + ".joblib")

Export data and predictions

In [67]:
train_export = training_data[['Match_ID', 'Home_Team', 'Away_Team', 'Round_ID', 'Total_Game_Score']]
train_export = pd.merge(train_export, X_train_preproc, how='left', left_index=True, right_index=True)
train_export['xgb_preds'] = super_xgb.predict(X_train_preproc)
train_export.to_csv(prediction_output_path + 'train_predictions_' + model_file_name + '.csv', index = False)

In [69]:
test_export = test_data[['Match_ID', 'Home_Team', 'Away_Team', 'Round_ID', 'Total_Game_Score']]
test_export = pd.merge(test_export, X_test_preproc, how='left', left_index=True, right_index=True)
test_export['xgb_preds'] = super_xgb.predict(X_test_preproc)
test_export.to_csv(prediction_output_path + 'test_predictions_' + model_file_name + '.csv', index = False)