Expected Score Model

Combine Expected Goals and Behind models for set shots and open play shots together.

In [21]:
import sys
sys.path.append("/Users/ciaran/Documents/Projects/AFL/git-repositories/expected-score-model/")

In [22]:
import pandas as pd
import numpy as np
import joblib
import warnings
warnings.filterwarnings('ignore')

from expected_score_model.config import set_shots_file_path, open_shots_file_path
from expected_score_model.domain.contracts.modelling_data_contract import ModellingDataContract
from expected_score_model.domain.modelling.supermodel import SuperXGBClassifier
from expected_score_model.domain.modelling.model_evaluation import XGBClassifierEvaluator

pd.options.display.max_rows = 999
pd.options.display.max_columns = 999

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Expected Score Version

In [45]:
expected_score_version = 3

Models

In [23]:
model_location = '/Users/ciaran/Documents/Projects/AFL/git-repositories/expected-score-model/model_outputs/models/'

In [24]:
expected_goal_set_version = 7
expected_behind_set_version = 4
expected_miss_set_version = 3

expected_goal_open_version = 7
expected_behind_open_version = 5
expected_miss_open_version = 4

In [25]:
expected_goal_set_model = joblib.load(model_location+"expected_goal_set_v"+str(expected_goal_set_version)+".joblib")
expected_behind_set_model = joblib.load(model_location+"expected_behind_set_v"+str(expected_behind_set_version)+".joblib")
expected_miss_set_model = joblib.load(model_location+"expected_miss_set_v"+str(expected_miss_set_version)+".joblib")

expected_goal_open_model = joblib.load(model_location+"expected_goal_open_v"+str(expected_goal_open_version)+".joblib")
expected_behind_open_model = joblib.load(model_location+"expected_behind_open_v"+str(expected_behind_open_version)+".joblib")
expected_miss_open_model = joblib.load(model_location+"expected_miss_open_v"+str(expected_miss_open_version)+".joblib")

Data

In [26]:
df_set_shots = pd.read_csv(set_shots_file_path)
df_open_shots = pd.read_csv(open_shots_file_path)

Processing

In [27]:
df_set_shots[['ballUp', 'centreBounce', 'kickIn', 'possGain', 'throwIn']] = pd.get_dummies(df_set_shots['Initial_State'])
df_open_shots[['ballUp', 'centreBounce', 'kickIn', 'possGain', 'throwIn']] = pd.get_dummies(df_open_shots['Initial_State'])

Predictions

In [28]:
set_shot_features = ModellingDataContract.set_goal_modelling_feature_list
open_shot_features = ModellingDataContract.open_goal_modelling_feature_list

In [37]:
df_set_shots['xGoals'] = expected_goal_set_model.predict_proba(df_set_shots[set_shot_features], calibrate=True)
df_set_shots['xBehinds'] = expected_behind_set_model.predict_proba(df_set_shots[set_shot_features], calibrate=True)
df_set_shots['xMiss'] = expected_miss_set_model.predict_proba(df_set_shots[set_shot_features], calibrate=True)

df_open_shots['xGoals'] = expected_goal_open_model.predict_proba(df_open_shots[open_shot_features], calibrate=True)
df_open_shots['xBehinds'] = expected_behind_open_model.predict_proba(df_open_shots[open_shot_features], calibrate=True)
df_open_shots['xMiss'] = expected_miss_open_model.predict_proba(df_open_shots[open_shot_features], calibrate=True)

Combine

In [38]:
df_shots = pd.concat([df_set_shots, df_open_shots], axis=0)
df_shots = df_shots.sort_values(by = ['Match_ID', "Chain_Number", "Order"])
df_shots.head()

Unnamed: 0,Chain_Number,Initial_State,Final_State,Order,Quarter,Quarter_Duration_Chain_Start,Quarter_Duration,Team_Chain,Team,Player,AFL_API_Player_ID,Description,x,y,Disposal,Shot_At_Goal,Behind_Detail,Venue_Width,Venue_Length,Home_Team,Away_Team,Home_Team_Direction_Q1,Match_ID,Round_ID,Year,Goal,Behind,Miss,Score,Home_Score,Away_Score,Home_Current_Score,Away_Current_Score,Current_Margin,Event_Type0,Event_Type1,Event_Type2,Event_Type3,x0,x1,x2,x3,y0,y1,y2,y3,Quarter_Duration0,Quarter_Duration1,Quarter_Duration2,Quarter_Duration3,Chain_Duration,Time_Since_Last_Action,Distance_Since_Last_Action,Distance_to_Right_Goal_x,Distance_to_Middle_y,Distance_to_Middle_Goal,Angle_to_Middle_Goal,Angle_to_Middle_Goal_degrees,Visible_Goal_Angle,Visible_Goal_Angle_degrees,Visible_Behind_Angle,Visible_Behind_Angle_degrees,Squared_Distance_to_Middle_Goal,Log_Distance_to_Middle_Goal,Set_Shot,GoalTrainingSet,GoalTestSet,GoalValidationSet,BehindTrainingSet,BehindTestSet,BehindValidationSet,MissTrainingSet,MissTestSet,MissValidationSet,ballUp,centreBounce,kickIn,possGain,throwIn,xGoals,xBehinds,xMiss
0,1,centreBounce,goal,11.0,1,13,39.0,Brisbane Lions,Brisbane Lions,Zac Bailey,Zac_Bailey,Kick,26.0,-21.0,effective,True,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021,1,0,0,6,6,0,0.0,0.0,0.0,Kick,Handball Received,Handball,Gather from Opposition,26.0,18.0,11.0,6.0,-21.0,-23.0,-26.0,-27.0,39.0,38.0,37.0,36.0,26.0,1.0,8.246211,52.0,21.0,56.0803,0.383818,21.99113,0.106164,6.082774,0.32704,18.737997,2704.0,3.951244,False,True,False,False,True,False,False,True,False,False,0,1,0,0,0,0.289081,0.493594,0.233659
1,6,possGain,goal,61.0,1,149,168.0,Brisbane Lions,Brisbane Lions,Zac Bailey,Zac_Bailey,Kick,35.0,19.0,effective,True,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021,1,0,0,6,6,0,6.0,0.0,6.0,Kick,Handball Received,Handball,Loose Ball Get,35.0,28.0,25.0,22.0,19.0,19.0,26.0,29.0,168.0,167.0,165.0,165.0,19.0,1.0,7.0,43.0,19.0,47.010637,0.416065,23.83874,0.125105,7.167963,0.389831,22.33568,1849.0,3.7612,False,True,False,False,True,False,False,False,False,True,0,0,0,1,0,0.313268,0.375746,0.225728
2,10,possGain,goal,110.0,1,285,306.0,Brisbane Lions,Brisbane Lions,Joe Daniher,Joe_Daniher,Kick,50.0,24.0,effective,True,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021,1,0,0,6,6,0,12.0,0.0,12.0,Kick,Gather,Kick Inside 50 Result,Kick Into F50,50.0,50.0,50.0,-8.0,24.0,28.0,28.0,44.0,306.0,305.0,304.0,303.0,21.0,1.0,4.0,28.0,24.0,36.878178,0.708626,40.601295,0.132764,7.606837,0.424028,24.29503,784.0,3.332205,False,True,False,False,False,False,True,False,False,True,0,0,0,1,0,0.495607,0.353212,0.266553
0,17,kickIn,behind,183.0,1,520,582.0,Sydney,Sydney,Isaac Heeney,Isaac_Heeney,Kick,46.0,-41.0,ineffective,True,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021,0,1,0,1,0,1,18.0,0.0,18.0,Kick,Mark On Lead,Kick Inside 50 Result,Kick Into F50,46.0,49.0,49.0,19.0,-41.0,-35.0,-35.0,-49.0,582.0,550.0,548.0,548.0,62.0,32.0,6.708204,32.0,41.0,52.009614,0.908067,52.028396,0.075999,4.354442,0.235146,13.472898,1024.0,3.465736,True,False,False,True,True,False,False,False,False,True,0,0,1,0,0,0.325491,0.348718,0.127758
3,21,possGain,goal,222.0,1,653,666.0,Sydney,Sydney,Logan McDonald,Logan_McDonald,Kick,34.0,15.0,effective,True,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021,1,0,0,6,0,6,18.0,1.0,17.0,Kick,Handball Received,Handball,Handball Received,34.0,19.0,10.0,7.0,15.0,17.0,20.0,20.0,666.0,664.0,662.0,662.0,13.0,2.0,15.132746,44.0,15.0,46.486557,0.328553,18.82471,0.13093,7.501763,0.408345,23.396432,1936.0,3.78419,False,True,False,False,True,False,False,True,False,False,0,0,0,1,0,0.329359,0.461428,0.197524


Checks

In [39]:
(df_shots['xGoals'] + df_shots['xBehinds'] + df_shots['xMiss']).max()

1.4987643726400424

Normalize

In [40]:
df_shots['xGoals_normalised'] = df_shots['xGoals'] / (df_shots['xGoals'] + df_shots['xBehinds'] + df_shots['xMiss'])
df_shots['xBehinds_normalised'] = df_shots['xBehinds'] / (df_shots['xGoals'] + df_shots['xBehinds'] + df_shots['xMiss'])
df_shots['xMiss_normalised'] = df_shots['xMiss'] / (df_shots['xGoals'] + df_shots['xBehinds'] + df_shots['xMiss'])

Create Expected Score

In [41]:
df_shots['xScore'] = df_shots['xGoals_normalised']*6 + df_shots['xBehinds_normalised']

Evaluate

In [42]:
df_shots[['Score', 'xScore']].sum()

Score     66630.000000
xScore    66369.862404
dtype: float64

In [43]:
df_shots[['Score', 'xScore']].describe()

Unnamed: 0,Score,xScore
count,20273.0,20273.0
mean,3.286637,3.273806
std,2.679445,0.725378
min,0.0,1.619952
25%,1.0,2.767783
50%,1.0,3.21669
75%,6.0,3.766883
max,6.0,5.027549


In [47]:
df_shots[df_shots['Set_Shot']][['Score', 'xScore']].sum()

Score     39604.00000
xScore    39063.38834
dtype: float64

In [49]:
df_shots[df_shots['Set_Shot']][['Score', 'xScore']].describe()

Unnamed: 0,Score,xScore
count,11057.0,11057.0
mean,3.581803,3.53291
std,2.644696,0.581823
min,0.0,1.649066
25%,1.0,3.140122
50%,6.0,3.421328
75%,6.0,3.898695
max,6.0,4.920164


In [48]:
df_shots[~df_shots['Set_Shot']][['Score', 'xScore']].sum()

Score     27026.000000
xScore    27306.474064
dtype: float64

In [50]:
df_shots[~df_shots['Set_Shot']][['Score', 'xScore']].describe()

Unnamed: 0,Score,xScore
count,9216.0,9216.0
mean,2.932509,2.962942
std,2.6781,0.757743
min,0.0,1.619952
25%,1.0,2.389664
50%,1.0,2.78497
75%,6.0,3.371225
max,6.0,5.027549


Export Shots with xScore

In [44]:
df_shots.head()

Unnamed: 0,Chain_Number,Initial_State,Final_State,Order,Quarter,Quarter_Duration_Chain_Start,Quarter_Duration,Team_Chain,Team,Player,AFL_API_Player_ID,Description,x,y,Disposal,Shot_At_Goal,Behind_Detail,Venue_Width,Venue_Length,Home_Team,Away_Team,Home_Team_Direction_Q1,Match_ID,Round_ID,Year,Goal,Behind,Miss,Score,Home_Score,Away_Score,Home_Current_Score,Away_Current_Score,Current_Margin,Event_Type0,Event_Type1,Event_Type2,Event_Type3,x0,x1,x2,x3,y0,y1,y2,y3,Quarter_Duration0,Quarter_Duration1,Quarter_Duration2,Quarter_Duration3,Chain_Duration,Time_Since_Last_Action,Distance_Since_Last_Action,Distance_to_Right_Goal_x,Distance_to_Middle_y,Distance_to_Middle_Goal,Angle_to_Middle_Goal,Angle_to_Middle_Goal_degrees,Visible_Goal_Angle,Visible_Goal_Angle_degrees,Visible_Behind_Angle,Visible_Behind_Angle_degrees,Squared_Distance_to_Middle_Goal,Log_Distance_to_Middle_Goal,Set_Shot,GoalTrainingSet,GoalTestSet,GoalValidationSet,BehindTrainingSet,BehindTestSet,BehindValidationSet,MissTrainingSet,MissTestSet,MissValidationSet,ballUp,centreBounce,kickIn,possGain,throwIn,xGoals,xBehinds,xMiss,xGoals_normalised,xBehinds_normalised,xMiss_normalised,xScore
0,1,centreBounce,goal,11.0,1,13,39.0,Brisbane Lions,Brisbane Lions,Zac Bailey,Zac_Bailey,Kick,26.0,-21.0,effective,True,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021,1,0,0,6,6,0,0.0,0.0,0.0,Kick,Handball Received,Handball,Gather from Opposition,26.0,18.0,11.0,6.0,-21.0,-23.0,-26.0,-27.0,39.0,38.0,37.0,36.0,26.0,1.0,8.246211,52.0,21.0,56.0803,0.383818,21.99113,0.106164,6.082774,0.32704,18.737997,2704.0,3.951244,False,True,False,False,True,False,False,True,False,False,0,1,0,0,0,0.289081,0.493594,0.233659,0.284435,0.485661,0.229904,2.19227
1,6,possGain,goal,61.0,1,149,168.0,Brisbane Lions,Brisbane Lions,Zac Bailey,Zac_Bailey,Kick,35.0,19.0,effective,True,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021,1,0,0,6,6,0,6.0,0.0,6.0,Kick,Handball Received,Handball,Loose Ball Get,35.0,28.0,25.0,22.0,19.0,19.0,26.0,29.0,168.0,167.0,165.0,165.0,19.0,1.0,7.0,43.0,19.0,47.010637,0.416065,23.83874,0.125105,7.167963,0.389831,22.33568,1849.0,3.7612,False,True,False,False,True,False,False,False,False,True,0,0,0,1,0,0.313268,0.375746,0.225728,0.342466,0.410767,0.246767,2.465563
2,10,possGain,goal,110.0,1,285,306.0,Brisbane Lions,Brisbane Lions,Joe Daniher,Joe_Daniher,Kick,50.0,24.0,effective,True,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021,1,0,0,6,6,0,12.0,0.0,12.0,Kick,Gather,Kick Inside 50 Result,Kick Into F50,50.0,50.0,50.0,-8.0,24.0,28.0,28.0,44.0,306.0,305.0,304.0,303.0,21.0,1.0,4.0,28.0,24.0,36.878178,0.708626,40.601295,0.132764,7.606837,0.424028,24.29503,784.0,3.332205,False,True,False,False,False,False,True,False,False,True,0,0,0,1,0,0.495607,0.353212,0.266553,0.444342,0.316676,0.238982,2.98273
0,17,kickIn,behind,183.0,1,520,582.0,Sydney,Sydney,Isaac Heeney,Isaac_Heeney,Kick,46.0,-41.0,ineffective,True,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021,0,1,0,1,0,1,18.0,0.0,18.0,Kick,Mark On Lead,Kick Inside 50 Result,Kick Into F50,46.0,49.0,49.0,19.0,-41.0,-35.0,-35.0,-49.0,582.0,550.0,548.0,548.0,62.0,32.0,6.708204,32.0,41.0,52.009614,0.908067,52.028396,0.075999,4.354442,0.235146,13.472898,1024.0,3.465736,True,False,False,True,True,False,False,False,False,True,0,0,1,0,0,0.325491,0.348718,0.127758,0.405866,0.434828,0.159306,2.870024
3,21,possGain,goal,222.0,1,653,666.0,Sydney,Sydney,Logan McDonald,Logan_McDonald,Kick,34.0,15.0,effective,True,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021,1,0,0,6,0,6,18.0,1.0,17.0,Kick,Handball Received,Handball,Handball Received,34.0,19.0,10.0,7.0,15.0,17.0,20.0,20.0,666.0,664.0,662.0,662.0,13.0,2.0,15.132746,44.0,15.0,46.486557,0.328553,18.82471,0.13093,7.501763,0.408345,23.396432,1936.0,3.78419,False,True,False,False,True,False,False,True,False,False,0,0,0,1,0,0.329359,0.461428,0.197524,0.333255,0.466886,0.19986,2.466413


In [46]:
df_shots.to_csv("/Users/ciaran/Documents/Projects/AFL/git-repositories/expected-score-model/data/shot_chains_expected_score_v"+str(expected_score_version)+".csv", index=False)