Expected Score Model

Combine Expected Goals and Behind models for set shots and open play shots together.

In [1]:
import sys
sys.path.append("/Users/ciaran/Documents/Projects/AFL/git-repositories/expected-score-model/")

In [2]:
import pandas as pd
import numpy as np
import joblib
import warnings
warnings.filterwarnings('ignore')

from expected_score_model.config import *
from expected_score_model.domain.preprocessing.preprocessing import expected_score_response_processing
from expected_score_model.domain.contracts.modelling_data_contract import ModellingDataContract
from expected_score_model.domain.modelling.supermodel import SuperXGBClassifier
from expected_score_model.domain.modelling.model_evaluation import XGBClassifierEvaluator

pd.options.display.max_rows = 999
pd.options.display.max_columns = 999

%load_ext autoreload
%autoreload 2



Expected Score Version

In [3]:
expected_score_version = 4

Models

In [4]:
expected_goal_set_model = joblib.load(exp_goal_set_model_file_path)
expected_behind_set_model = joblib.load(exp_behind_set_model_file_path)
expected_miss_set_model = joblib.load(exp_miss_set_model_file_path)

expected_goal_open_model = joblib.load(exp_goal_open_model_file_path)
expected_behind_open_model = joblib.load(exp_behind_open_model_file_path)
expected_miss_open_model = joblib.load(exp_miss_open_model_file_path)

Data

In [5]:
chain_data = pd.read_csv(chain_file_path, low_memory=False)
chain_data.head()

Unnamed: 0,Chain_Number,Initial_State,Final_State,Order,Quarter,Quarter_Duration_Chain_Start,Quarter_Duration,Team_Chain,Team,Player,AFL_API_Player_ID,Description,x,y,Disposal,Shot_At_Goal,Behind_Detail,Venue_Width,Venue_Length,Home_Team,Away_Team,Home_Team_Direction_Q1,Match_ID,Round_ID,Year,Season
0,1,centreBounce,goal,1.0,1,13,13.0,Brisbane Lions,,,,Centre Bounce,0.0,0.0,,,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,
1,1,centreBounce,goal,2.0,1,13,24.0,Brisbane Lions,Brisbane Lions,Dayne Zorko,Dayne_Zorko,Hard Ball Get,8.0,-5.0,,,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,
2,1,centreBounce,goal,3.0,1,13,24.0,Brisbane Lions,Brisbane Lions,Dayne Zorko,Dayne_Zorko,Handball,9.0,-6.0,ineffective,,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,
3,1,centreBounce,goal,4.0,1,13,28.0,Brisbane Lions,Sydney,Oliver Florent,Oliver_Florent,Loose Ball Get,11.0,-7.0,,,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,
4,1,centreBounce,goal,5.0,1,13,29.0,Brisbane Lions,Sydney,Oliver Florent,Oliver_Florent,Handball,12.0,-5.0,effective,,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,


Processing

In [6]:
chain_data = expected_score_response_processing(chain_data)
chain_data['Event_Type1'] = chain_data['Description'].shift(1)
df_shots = chain_data[chain_data['Shot_At_Goal'] == True]
df_shots['Set_Shot'] = df_shots['Event_Type1'].apply(lambda x: ("Mark" in x) or ("Free" in x))
df_set_shots = df_shots[df_shots['Set_Shot']]
df_open_shots = df_shots[~df_shots['Set_Shot']]

In [7]:
goal_set_preproc = joblib.load(exp_goal_set_preprocessor_file_path)
behind_set_preproc = joblib.load(exp_behind_set_preprocessor_file_path)
miss_set_preproc = joblib.load(exp_miss_set_preprocessor_file_path)

goal_open_preproc = joblib.load(exp_goal_open_preprocessor_file_path)
behind_open_preproc = joblib.load(exp_behind_open_preprocessor_file_path)
miss_open_preproc = joblib.load(exp_miss_open_preprocessor_file_path)

In [8]:
set_goal_features = goal_set_preproc.transform(chain_data)
set_behind_features = behind_set_preproc.transform(chain_data)
set_miss_features = miss_set_preproc.transform(chain_data)

open_goal_features = goal_open_preproc.transform(chain_data)
open_behind_features = behind_open_preproc.transform(chain_data)
open_miss_features = miss_open_preproc.transform(chain_data)

Predictions

In [9]:
df_set_shots['xGoals'] = expected_goal_set_model.predict_proba(set_goal_features, calibrate=True)
df_set_shots['xBehinds'] = expected_behind_set_model.predict_proba(set_behind_features, calibrate=True)
df_set_shots['xMiss'] = expected_miss_set_model.predict_proba(set_miss_features, calibrate=True)

df_open_shots['xGoals'] = expected_goal_open_model.predict_proba(open_goal_features, calibrate=True)
df_open_shots['xBehinds'] = expected_behind_open_model.predict_proba(open_behind_features, calibrate=True)
df_open_shots['xMiss'] = expected_miss_open_model.predict_proba(open_miss_features, calibrate=True)

Combine

In [10]:
df_shots = pd.concat([df_set_shots, df_open_shots], axis=0)
df_shots = df_shots.sort_values(by = ['Match_ID', "Chain_Number", "Order"])
df_shots.head()

Unnamed: 0,Chain_Number,Initial_State,Final_State,Order,Quarter,Quarter_Duration_Chain_Start,Quarter_Duration,Team_Chain,Team,Player,AFL_API_Player_ID,Description,x,y,Disposal,Shot_At_Goal,Behind_Detail,Venue_Width,Venue_Length,Home_Team,Away_Team,Home_Team_Direction_Q1,Match_ID,Round_ID,Year,Season,Goal,Behind,Miss,Score,Event_Type1,Set_Shot,xGoals,xBehinds,xMiss
10,1,centreBounce,goal,11.0,1,13,39.0,Brisbane Lions,Brisbane Lions,Zac Bailey,Zac_Bailey,Kick,26.0,-21.0,effective,True,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,,1,0,0,6,Handball Received,False,0.289081,0.47746,0.173369
60,6,possGain,goal,61.0,1,149,168.0,Brisbane Lions,Brisbane Lions,Zac Bailey,Zac_Bailey,Kick,35.0,19.0,effective,True,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,,1,0,0,6,Handball Received,False,0.313268,0.396646,0.258089
109,10,possGain,goal,110.0,1,285,306.0,Brisbane Lions,Brisbane Lions,Joe Daniher,Joe_Daniher,Kick,50.0,24.0,effective,True,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,,1,0,0,6,Gather,False,0.495607,0.430808,0.281173
182,17,kickIn,behind,183.0,1,520,582.0,Sydney,Sydney,Isaac Heeney,Isaac_Heeney,Kick,46.0,-41.0,ineffective,True,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,,0,1,0,1,Mark On Lead,True,0.325491,0.413704,0.393362
221,21,possGain,goal,222.0,1,653,666.0,Sydney,Sydney,Logan McDonald,Logan_McDonald,Kick,34.0,15.0,effective,True,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,,1,0,0,6,Handball Received,False,0.329359,0.416202,0.186615


Checks

In [11]:
(df_shots['xGoals'] + df_shots['xBehinds'] + df_shots['xMiss']).max()

1.3725281398818756

Normalize

In [12]:
df_shots['xGoals_normalised'] = df_shots['xGoals'] / (df_shots['xGoals'] + df_shots['xBehinds'] + df_shots['xMiss'])
df_shots['xBehinds_normalised'] = df_shots['xBehinds'] / (df_shots['xGoals'] + df_shots['xBehinds'] + df_shots['xMiss'])
df_shots['xMiss_normalised'] = df_shots['xMiss'] / (df_shots['xGoals'] + df_shots['xBehinds'] + df_shots['xMiss'])

Create Expected Score

In [13]:
df_shots['xScore'] = df_shots['xGoals_normalised']*6 + df_shots['xBehinds_normalised']

Evaluate

In [14]:
df_shots[['Score', 'xScore']].sum()

Score     98324.000000
xScore    98987.103621
dtype: float64

In [15]:
df_shots[['Score', 'xScore']].describe()

Unnamed: 0,Score,xScore
count,29906.0,29906.0
mean,3.287768,3.309941
std,2.682481,0.970739
min,0.0,1.489613
25%,1.0,2.553543
50%,1.0,3.106996
75%,6.0,3.918454
max,6.0,5.740831


In [16]:
df_shots[df_shots['Set_Shot']][['Score', 'xScore']].sum()

Score     58570.000000
xScore    58707.781706
dtype: float64

In [17]:
df_shots[df_shots['Set_Shot']][['Score', 'xScore']].describe()

Unnamed: 0,Score,xScore
count,16310.0,16310.0
mean,3.591048,3.599496
std,2.646786,1.014446
min,0.0,1.71692
25%,1.0,2.843671
50%,6.0,3.421003
75%,6.0,4.222578
max,6.0,5.740831


In [18]:
df_shots[~df_shots['Set_Shot']][['Score', 'xScore']].sum()

Score     39754.000000
xScore    40279.321915
dtype: float64

In [19]:
df_shots[~df_shots['Set_Shot']][['Score', 'xScore']].describe()

Unnamed: 0,Score,xScore
count,13596.0,13596.0
mean,2.923948,2.962586
std,2.679871,0.785535
min,0.0,1.489613
25%,1.0,2.384906
50%,1.0,2.786534
75%,6.0,3.349551
max,6.0,5.123928


Merge xScore to Chain

In [20]:
chain_data.shape

(1237133, 65)

In [21]:
chain_data = chain_data.merge(df_shots[['Match_ID', "Chain_Number", "Order", 'xGoals', 'xBehinds', 'xMiss', 'xGoals_normalised', 'xBehinds_normalised', 'xMiss_normalised', 'xScore']], how = "left", on = ['Match_ID', "Chain_Number", "Order"])
chain_data.shape

(1237133, 72)

In [22]:
chain_data.head()

Unnamed: 0,Chain_Number,Initial_State,Final_State,Order,Quarter,Quarter_Duration_Chain_Start,Quarter_Duration,Team_Chain,Team,Player,AFL_API_Player_ID,Description,x,y,Disposal,Shot_At_Goal,Behind_Detail,Venue_Width,Venue_Length,Home_Team,Away_Team,Home_Team_Direction_Q1,Match_ID,Round_ID,Year,Season,Goal,Behind,Miss,Score,Event_Type1,ballUp,centreBounce,kickIn,possGain,throwIn,Event_Type0,Event_Type2,Event_Type3,x0,x1,x2,x3,y0,y1,y2,y3,Quarter_Duration0,Quarter_Duration1,Quarter_Duration2,Quarter_Duration3,Time_Since_Last_Action,Distance_Since_Last_Action,Chain_Duration,Distance_to_Right_Goal_x,Distance_to_Middle_y,Distance_to_Middle_Goal,Angle_to_Middle_Goal,Angle_to_Middle_Goal_degrees,Visible_Goal_Angle,Visible_Goal_Angle_degrees,Visible_Behind_Angle,Visible_Behind_Angle_degrees,Squared_Distance_to_Middle_Goal,Log_Distance_to_Middle_Goal,xGoals,xBehinds,xMiss,xGoals_normalised,xBehinds_normalised,xMiss_normalised,xScore
0,1,centreBounce,goal,1.0,1,13,13.0,Brisbane Lions,,,,Centre Bounce,0.0,0.0,,,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,,0,0,0,0,,0,1,0,0,0,Centre Bounce,,,0.0,,,,0.0,,,,13.0,,,,,,0.0,78.0,0.0,78.0,0.0,0.0,0.08219,4.709118,0.24994,14.320502,6084.0,4.356709,,,,,,,
1,1,centreBounce,goal,2.0,1,13,24.0,Brisbane Lions,Brisbane Lions,Dayne Zorko,Dayne_Zorko,Hard Ball Get,8.0,-5.0,,,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,,0,0,0,0,Centre Bounce,0,1,0,0,0,Hard Ball Get,,,8.0,0.0,,,-5.0,0.0,,,24.0,13.0,,,11.0,9.433981,11.0,70.0,5.0,70.178344,0.071307,4.085617,0.091154,5.222739,0.278097,15.933804,4900.0,4.248495,,,,,,,
2,1,centreBounce,goal,3.0,1,13,24.0,Brisbane Lions,Brisbane Lions,Dayne Zorko,Dayne_Zorko,Handball,9.0,-6.0,ineffective,,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,,0,0,0,0,Hard Ball Get,0,1,0,0,0,Handball,Centre Bounce,,9.0,8.0,0.0,,-6.0,-5.0,0.0,,24.0,24.0,13.0,,0.0,1.414214,11.0,69.0,6.0,69.260378,0.086738,4.969741,0.092254,5.285792,0.281582,16.133481,4761.0,4.234107,,,,,,,
3,1,centreBounce,goal,4.0,1,13,28.0,Brisbane Lions,Sydney,Oliver Florent,Oliver_Florent,Loose Ball Get,11.0,-7.0,,,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,,0,0,0,0,Handball,0,1,0,0,0,Loose Ball Get,Hard Ball Get,Centre Bounce,11.0,9.0,8.0,0.0,-7.0,-6.0,-5.0,0.0,28.0,24.0,24.0,13.0,4.0,2.236068,15.0,67.0,7.0,67.364679,0.1041,5.964487,0.094705,5.426178,0.289349,16.578485,4489.0,4.204693,,,,,,,
4,1,centreBounce,goal,5.0,1,13,29.0,Brisbane Lions,Sydney,Oliver Florent,Oliver_Florent,Handball,12.0,-5.0,effective,,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,,0,0,0,0,Loose Ball Get,0,1,0,0,0,Handball,Handball,Hard Ball Get,12.0,11.0,9.0,8.0,-5.0,-7.0,-6.0,-5.0,29.0,28.0,24.0,24.0,1.0,2.236068,16.0,66.0,5.0,66.189123,0.075613,4.332314,0.096642,5.537192,0.295465,16.928869,4356.0,4.189655,,,,,,,


Export Shots with xScore

In [None]:
df_shots.tail()

In [None]:
df_shots.to_csv("/Users/ciaran/Documents/Projects/AFL/git-repositories/expected-score-model/data/shot_chains_expected_score_v"+str(expected_score_version)+"_202319.csv", index=False)