Compute VAEP Values & Calculate Player Ratings

In [1]:
import sys
sys.path.append("/Users/ciaran/Documents/Projects/AFL/git-repositories/afl-player-ratings/")
sys.path.append("/Users/ciaran/Documents/Projects/AFL/git-repositories/expected-score-model/")

In [2]:
import pandas as pd
import numpy as np
import joblib
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

from config import chain_file_path
from exp_vaep.domain.contracts.modelling_data_contract import ModellingDataContract
from exp_vaep.domain.preprocessing.preprocessing import *
from exp_vaep.domain.preprocessing.formula import *

pd.options.display.max_rows = 999
pd.options.display.max_columns = 999

%load_ext autoreload
%autoreload 2

In [3]:
FEATURES = ModellingDataContract.feature_list_scores

Load chain data

In [4]:
chains = pd.read_csv(chain_file_path)
chains.tail()

Unnamed: 0,Chain_Number,Initial_State,Final_State,Order,Quarter,Quarter_Duration_Chain_Start,Quarter_Duration,Team_Chain,Team,Player,AFL_API_Player_ID,Description,x,y,Disposal,Shot_At_Goal,Behind_Detail,Venue_Width,Venue_Length,Home_Team,Away_Team,Home_Team_Direction_Q1,Match_ID,Round_ID,Year,Season
1169201,252,ballUp,turnover,1984.0,4,1937,1942.0,St Kilda,St Kilda,Dan Butler,Dan_Butler,Kick,-65.0,14.0,clanger,,,129,160,St Kilda,North Melbourne,left,202319_StKilda_NorthMelbourne,202319,,2023.0
1169202,252,ballUp,turnover,1985.0,4,1937,1945.0,St Kilda,St Kilda,Dan Butler,Dan_Butler,Out On Full After Kick,-69.0,34.0,,,,129,160,St Kilda,North Melbourne,left,202319_StKilda_NorthMelbourne,202319,,2023.0
1169203,253,possGain,behind,1986.0,4,1949,1949.0,North Melbourne,North Melbourne,Luke Davies-Uniacke,Luke_Davies-Uniacke,OOF Kick In,67.0,-34.0,,,,129,160,St Kilda,North Melbourne,left,202319_StKilda_NorthMelbourne,202319,,2023.0
1169204,253,possGain,behind,1987.0,4,1949,1976.0,North Melbourne,North Melbourne,Luke Davies-Uniacke,Luke_Davies-Uniacke,Kick,67.0,-34.0,ineffective,True,,129,160,St Kilda,North Melbourne,left,202319_StKilda_NorthMelbourne,202319,,2023.0
1169205,253,possGain,behind,1988.0,4,1949,1978.0,North Melbourne,North Melbourne,Luke Davies-Uniacke,Luke_Davies-Uniacke,Behind,67.0,-34.0,,,missLeft,129,160,St Kilda,North Melbourne,left,202319_StKilda_NorthMelbourne,202319,,2023.0


Load Expected Score Data

In [5]:
expected_score_version = 4
model_location = '/Users/ciaran/Documents/Projects/AFL/git-repositories/expected-score-model/model_outputs/models/'
expected_goal_set_version = 7
expected_behind_set_version = 5
expected_miss_set_version = 4

expected_goal_open_version = 7
expected_behind_open_version = 6
expected_miss_open_version = 5

In [6]:
expected_goal_set_model = joblib.load(model_location+"expected_goal_set_v"+str(expected_goal_set_version)+".joblib")
expected_behind_set_model = joblib.load(model_location+"expected_behind_set_v"+str(expected_behind_set_version)+".joblib")
expected_miss_set_model = joblib.load(model_location+"expected_miss_set_v"+str(expected_miss_set_version)+".joblib")

expected_goal_open_model = joblib.load(model_location+"expected_goal_open_v"+str(expected_goal_open_version)+".joblib")
expected_behind_open_model = joblib.load(model_location+"expected_behind_open_v"+str(expected_behind_open_version)+".joblib")
expected_miss_open_model = joblib.load(model_location+"expected_miss_open_v"+str(expected_miss_open_version)+".joblib")

In [7]:
goal_set_features = expected_goal_set_model.xgb_model.get_booster().feature_names
behind_set_features = expected_behind_set_model.xgb_model.get_booster().feature_names
miss_set_features = expected_miss_set_model.xgb_model.get_booster().feature_names

goal_open_features = expected_goal_open_model.xgb_model.get_booster().feature_names
behind_open_features = expected_behind_open_model.xgb_model.get_booster().feature_names
miss_open_features = expected_miss_open_model.xgb_model.get_booster().feature_names

In [8]:
chains[['ballUp', 'centreBounce', 'kickIn', 'possGain', 'throwIn']] = pd.get_dummies(chains['Initial_State'])

In [9]:
chains['Event_Type1'] = chains['Description'].shift(1)
shots = chains[chains['Shot_At_Goal'] == True]
shots['Set_Shot'] = shots['Event_Type1'].apply(lambda x: ("Mark" in x) or ("Free" in x))
set_shots = shots[shots['Set_Shot']]
open_shots = shots[~shots['Set_Shot']]

In [10]:
set_shots = expected_score_feature_engineering(set_shots)
open_shots = expected_score_feature_engineering(open_shots)

In [11]:
set_shots['xGoals'] = expected_goal_set_model.predict_proba(set_shots[goal_set_features], calibrate=True)
set_shots['xBehinds'] = expected_behind_set_model.predict_proba(set_shots[behind_set_features], calibrate=True)
set_shots['xMiss'] = expected_miss_set_model.predict_proba(set_shots[miss_set_features], calibrate=True)

open_shots['xGoals'] = expected_goal_open_model.predict_proba(open_shots[goal_open_features], calibrate=True)
open_shots['xBehinds'] = expected_behind_open_model.predict_proba(open_shots[behind_open_features], calibrate=True)
open_shots['xMiss'] = expected_miss_open_model.predict_proba(open_shots[miss_open_features], calibrate=True)

In [12]:
exp_shots = pd.concat([set_shots, open_shots], axis=0)
exp_shots = exp_shots.sort_values(by = ['Match_ID', "Chain_Number", "Order"])
shots = shots.merge(exp_shots[['Chain_Number', 'Order', 'Match_ID', 'xGoals', 'xBehinds', 'xMiss']], how = "left", on = ['Chain_Number', 'Order', 'Match_ID'])

In [13]:
shots['xGoals_normalised'] = shots['xGoals'] / (shots['xGoals'] + shots['xBehinds'] + shots['xMiss'])
shots['xBehinds_normalised'] = shots['xBehinds'] / (shots['xGoals'] + shots['xBehinds'] + shots['xMiss'])
shots['xMiss_normalised'] = shots['xMiss'] / (shots['xGoals'] + shots['xBehinds'] + shots['xMiss'])
shots['xScore'] = shots['xGoals_normalised']*6 + shots['xBehinds_normalised']

In [14]:
chains = chains.merge(shots[['Match_ID', 'Chain_Number', 'Order', 'xScore', 'xGoals_normalised']], how="left", on = ['Match_ID', 'Chain_Number', 'Order'])
chains[['xScore', 'xGoals_normalised']] = chains[['xScore', 'xGoals_normalised']].fillna(0)
chains.head()

Unnamed: 0,Chain_Number,Initial_State,Final_State,Order,Quarter,Quarter_Duration_Chain_Start,Quarter_Duration,Team_Chain,Team,Player,AFL_API_Player_ID,Description,x,y,Disposal,Shot_At_Goal,Behind_Detail,Venue_Width,Venue_Length,Home_Team,Away_Team,Home_Team_Direction_Q1,Match_ID,Round_ID,Year,Season,ballUp,centreBounce,kickIn,possGain,throwIn,Event_Type1,xScore,xGoals_normalised
0,1,centreBounce,goal,1.0,1,13,13.0,Brisbane Lions,,,,Centre Bounce,0.0,0.0,,,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,,0,1,0,0,0,,0.0,0.0
1,1,centreBounce,goal,2.0,1,13,24.0,Brisbane Lions,Brisbane Lions,Dayne Zorko,Dayne_Zorko,Hard Ball Get,8.0,-5.0,,,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,,0,1,0,0,0,Centre Bounce,0.0,0.0
2,1,centreBounce,goal,3.0,1,13,24.0,Brisbane Lions,Brisbane Lions,Dayne Zorko,Dayne_Zorko,Handball,9.0,-6.0,ineffective,,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,,0,1,0,0,0,Hard Ball Get,0.0,0.0
3,1,centreBounce,goal,4.0,1,13,28.0,Brisbane Lions,Sydney,Oliver Florent,Oliver_Florent,Loose Ball Get,11.0,-7.0,,,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,,0,1,0,0,0,Handball,0.0,0.0
4,1,centreBounce,goal,5.0,1,13,29.0,Brisbane Lions,Sydney,Oliver Florent,Oliver_Florent,Handball,12.0,-5.0,effective,,,138,156,Brisbane Lions,Sydney,right,202101_BrisbaneLions_Sydney,202101,2021.0,,0,1,0,0,0,Loose Ball Get,0.0,0.0


Preprocess

In [17]:
schema_chains = convert_chains_to_schema(chains)
gamestate_features = create_gamestate_features(schema_chains)

In [18]:
schema_chains.head()

Unnamed: 0,match_id,chain_number,order,quarter,quarter_seconds,overall_seconds,team,player,start_x,start_y,end_x,end_y,action_type,outcome_type,xScore
1,202101_BrisbaneLions_Sydney,1,2.0,1,24.0,24.0,Brisbane Lions,Dayne Zorko,8.0,-5.0,9.0,-6.0,Hard Ball Get,effective,0.0
2,202101_BrisbaneLions_Sydney,1,3.0,1,24.0,24.0,Brisbane Lions,Dayne Zorko,9.0,-6.0,-11.0,7.0,Handball,ineffective,0.0
3,202101_BrisbaneLions_Sydney,1,4.0,1,28.0,28.0,Sydney,Oliver Florent,-11.0,7.0,-12.0,5.0,Loose Ball Get,effective,0.0
4,202101_BrisbaneLions_Sydney,1,5.0,1,29.0,29.0,Sydney,Oliver Florent,-12.0,5.0,-14.0,2.0,Handball,effective,0.0
5,202101_BrisbaneLions_Sydney,1,6.0,1,30.0,30.0,Sydney,George Hewett,-14.0,2.0,-22.0,2.0,Carry,effective,0.0


In [19]:
exp_vaep_modelling_data = pd.concat([schema_chains, gamestate_features], axis=1)

In [20]:
exp_vaep_features = exp_vaep_modelling_data[FEATURES]

Load models

In [21]:
model_file_path = "/Users/ciaran/Documents/Projects/AFL/git-repositories/afl-player-ratings/exp_vaep/model_outputs/models/"
scoring_model = "exp_vaep_scoring_v1.joblib"
conceding_model = "exp_vaep_conceding_v1.joblib"

In [22]:
exp_score_model = joblib.load(model_file_path + "/" + scoring_model)
exp_concede_model = joblib.load(model_file_path + "/" + conceding_model)

Make predictions

In [23]:
schema_chains['scores'] = np.clip(exp_score_model.predict(exp_vaep_features), 0, 6)
schema_chains['concedes'] = np.clip(exp_concede_model.predict(exp_vaep_features), 0, 6)

In [24]:
schema_chains['scores'].describe()

count    947887.000000
mean          0.814553
std           0.848166
min           0.000000
25%           0.233094
50%           0.595690
75%           1.083073
max           6.000000
Name: scores, dtype: float64

Compute VAEP

In [25]:
match_list = list(schema_chains['match_id'].unique())
match_vaep_list = []
for match in match_list:
    match_chains = schema_chains[schema_chains['match_id'] == match]
    v = value(match_chains, match_chains['scores'], match_chains['concedes'])
    match_vaep_list.append(v)
    
vaep_values = pd.concat(match_vaep_list, axis=0)

In [26]:
chain_vaep_data = pd.concat([schema_chains, vaep_values], axis=1)
chain_vaep_data.head(12)

Unnamed: 0,match_id,chain_number,order,quarter,quarter_seconds,overall_seconds,team,player,start_x,start_y,end_x,end_y,action_type,outcome_type,xScore,scores,concedes,offensive_value,defensive_value,vaep_value
1,202101_BrisbaneLions_Sydney,1,2.0,1,24.0,24.0,Brisbane Lions,Dayne Zorko,8.0,-5.0,9.0,-6.0,Hard Ball Get,effective,0.0,0.246684,0.158121,0.0,-0.0,0.0
2,202101_BrisbaneLions_Sydney,1,3.0,1,24.0,24.0,Brisbane Lions,Dayne Zorko,9.0,-6.0,-11.0,7.0,Handball,ineffective,0.0,0.0,0.362654,-0.246684,-0.204533,-0.451217
3,202101_BrisbaneLions_Sydney,1,4.0,1,28.0,28.0,Sydney,Oliver Florent,-11.0,7.0,-12.0,5.0,Loose Ball Get,effective,0.0,0.668677,0.081827,0.306023,-0.081827,0.224196
4,202101_BrisbaneLions_Sydney,1,5.0,1,29.0,29.0,Sydney,Oliver Florent,-12.0,5.0,-14.0,2.0,Handball,effective,0.0,0.456306,0.099139,-0.212371,-0.017312,-0.229683
5,202101_BrisbaneLions_Sydney,1,6.0,1,30.0,30.0,Sydney,George Hewett,-14.0,2.0,-22.0,2.0,Carry,effective,0.0,1.176206,0.069984,0.7199,0.029155,0.749055
6,202101_BrisbaneLions_Sydney,1,7.0,1,31.0,31.0,Sydney,George Hewett,-22.0,2.0,6.0,-27.0,Kick,clanger,0.0,0.0,0.438838,-1.176206,-0.368854,-1.545059
7,202101_BrisbaneLions_Sydney,1,8.0,1,36.0,36.0,Brisbane Lions,Hugh McCluggage,6.0,-27.0,11.0,-26.0,Gather,effective,0.0,1.003286,0.063453,0.564449,-0.063453,0.500995
8,202101_BrisbaneLions_Sydney,1,9.0,1,37.0,37.0,Brisbane Lions,Hugh McCluggage,11.0,-26.0,18.0,-23.0,Handball,effective,0.0,0.4992,0.037909,-0.504086,0.025544,-0.478542
9,202101_BrisbaneLions_Sydney,1,10.0,1,38.0,38.0,Brisbane Lions,Zac Bailey,18.0,-23.0,26.0,-21.0,Carry,effective,0.0,1.97952,0.048713,1.480319,-0.010804,1.469515
10,202101_BrisbaneLions_Sydney,1,11.0,1,39.0,39.0,Brisbane Lions,Zac Bailey,26.0,-21.0,26.0,-21.0,Shot,effective,1.727293,2.211809,0.129209,0.23229,-0.080495,0.151794


Checks

In [27]:
chain_vaep_data['vaep_value'].max(), chain_vaep_data['vaep_value'].min()

(5.758547306060791, -5.381438732147217)

Export

In [28]:
chain_vaep_data.to_csv("/Users/ciaran/Documents/Projects/AFL/git-repositories/afl-player-ratings/data/schema_chains_exp_vaep_values.csv", index=False)