In [1]:
import pandas as pd
import re
import numpy as np
from scipy import stats
import scipy as sp
from sklearn import metrics
from bayesian_player_ratings import *

import os
import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option('display.precision', 4)

import optuna


1. load expected vaep data and player data
2. simulate initial updating process
3. put into a function
4. create objective function
5. run optuna optimisation to find update parameters

Load in player and match stats

In [2]:
match_summary = pd.read_csv("/Users/ciaran/Documents/Projects/AFL/data/match_summary.csv")
match_summary = score_col_splitter(match_summary, "Q4_Score")
match_summary['Season'] = match_summary['Match_ID'].apply(lambda x: int(x[:4]))

player_stats = pd.read_csv("/Users/ciaran/Documents/Projects/AFL/data/scored_player_stats_v2.csv")
player_stats = create_additional_player_stats_variables(player_stats)
player_stats['exp_vaep_value'] = player_stats['exp_vaep_value'].fillna(0)
player_stats['exp_offensive_value'] = player_stats['exp_offensive_value'].fillna(0)
player_stats['exp_defensive_value'] = player_stats['exp_defensive_value'].fillna(0)

match_stats = aggregate_player_to_match_stats(player_stats)

match_summary_stats = match_summary.merge(match_stats, how = "left", on = "Match_ID")


In [3]:
team_info = pd.read_csv("/Users/ciaran/Documents/Projects/AFL/data/team_info.csv")
home_team_info = team_info[['Team', 'Home_Ground_1']].rename(columns = {'Team':'Home_Team', 'Home_Ground_1':'Home_Team_Ground'})
match_summary_stats = match_summary_stats.merge(home_team_info, how = 'left', left_on='Home_Team', right_on='Home_Team')
away_team_info = team_info[['Team', 'Home_Ground_1']].rename(columns = {'Team':'Away_Team', 'Home_Ground_1':'Away_Team_Ground'})
match_summary_stats = match_summary_stats.merge(away_team_info, how = 'left', left_on='Away_Team', right_on='Away_Team')

In [4]:
venue_info = pd.read_csv("/Users/ciaran/Documents/Projects/AFL/data/venue_info.csv")
away_venue_info = venue_info[['Venue', 'City']].rename(columns = {'Venue':'Away_Team_Ground', 'City':'Away_Team_City'})
match_summary_stats = match_summary_stats.merge(away_venue_info, how = 'left', left_on='Away_Team_Ground', right_on='Away_Team_Ground')

In [5]:
player_stats.tail(1)

Unnamed: 0,Match_ID,Team,Player,Round_ID,AFL_API_Player_ID,Player_Type,playerId,Age,Height,Weight,Number,Kicking_Foot,State_Of_Origin,Draft_Year,Debut_Year,Recruited_From,Draft_Position,Draft_Type,Photo_URL,Date_Of_Birth,Percent_Played,Behinds,Bounces,Centre_Bounces_Attended,Centre_Clearances,Clangers,Defensive_Contest_Losses,Defensive_Contest_Loss_Percentage,Defensive_One_On_One_Contests,Contested_Marks,Contested_Possession_Rate,Contested_Possessions,Offensive_One_On_One_Contests,Offensive_Contest_Wins,Offensive_Contest_Win_Percentage,Defensive_Half_Pressure_Acts,Disposal_Efficiency,Disposals,AFL_Fantasy_Points,Effective_Disposals,Effective_Kicks,Inside_50_Ground_Ball_Gets,Frees_Against,Frees_For,Goal_Accuracy,Goal_Assists,Goals,Ground_Ball_Gets,Handballs,Hit_Outs,Hit_Outs_To_Advantage,Hit_Outs_To_Advantage_Rate,Hit_Out_Win_Percentage,Inside_50s,Intercept_Marks,Intercepts,Kick_Efficiency,Kick_Ins,Kick_Ins_Played_On,Kicks,Kick_To_Handball_Ratio,Marks,Marks_Inside_50,Marks_On_Lead,Metres_Gained,One_Percenters,Pressure_Acts,Player_Rating_Points,Rebound_50s,Ruck_Contests,Score_Involvements,Score_Launches,Shots_At_Goal,Spoils,Stoppage_Clearances,Tackles,Tackles_Inside_50,Clearances,Possessions,Turnovers,Uncontested_Possessions,AFLCA_Player_ID,Coaches_Votes,Position,Team_Status,Position_Sub_Group,Position_Group,Year,Brownlow_Votes,Season,xScore,xT_created,xT_denied,vaep_value,offensive_value,defensive_value,exp_vaep_value,exp_offensive_value,exp_defensive_value,xT_received,xT_prevented,vaep_value_received,exp_vaep_value_received,Player_Season,Score,xScore_Diff,Home_Team,Away_Team,Opponent,Round,Round_str,Round_ID_num
28816,2023F2_PortAdelaide_GreaterWesternSydney,Port Adelaide,Zak Butters,2023F2,Zak_Butters,MIDFIELDER,CD_I1006121,23,181,77,9,RIGHT,VIC,2018.0,2019.0,Darley (Vic)/Maribyrnong College (Vic)/Western...,12.0,nationalDraft,https://s.afl.com.au/staticfile/AFL Tenant/AFL...,2000-09-08,87,1,0.0,,0,2,,,,0,,2,,,,,42.9,7,33,,,,1,0,0.0,0,0,,4,0,,,,0,,1,,,,3,,2,1,,103.0,0,,,0,,2,,1,,1,3,0,1,8,2,6,,,Centre,FINAL_TEAM,Centre,Midfield,,,2023,4.7408,0.2185,0.0379,0.3014,0.3202,-0.0188,3.946,4.6338,-0.6877,0.144,0.0555,1.0421,4.4415,Zak Butters_2023,1,-3.7408,Port Adelaide,Greater Western Sydney,Greater Western Sydney,25,25,202325


In [6]:
match_stats.tail(1)

Unnamed: 0_level_0,Away_xScore,Home_xScore,Away_vaep_value,Home_vaep_value,Away_offensive_value,Home_offensive_value,Away_defensive_value,Home_defensive_value,Away_exp_vaep_value,Home_exp_vaep_value,Away_exp_offensive_value,Home_exp_offensive_value,Away_exp_defensive_value,Home_exp_defensive_value
Match_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2023F2_PortAdelaide_GreaterWesternSydney,93.4562,78.4249,17.4746,13.5095,15.4538,11.3737,2.0208,2.1359,111.858,99.9296,102.0686,95.4825,9.7894,4.4471


In [7]:
match_summary_stats.tail(1)

Unnamed: 0,Home_Team,Away_Team,Q4_Score,Margin,Total Game Score,Home Win,Venue,City,Date,Attendance,Temperature,Weather_Type,Year,Round_ID,Match_ID,Season,Home_Score,Home_Goals,Home_Behinds,Home_Shots,Home_Conversion,Away_Score,Away_Goals,Away_Behinds,Away_Shots,Away_Conversion,Away_xScore,Home_xScore,Away_vaep_value,Home_vaep_value,Away_offensive_value,Home_offensive_value,Away_defensive_value,Home_defensive_value,Away_exp_vaep_value,Home_exp_vaep_value,Away_exp_offensive_value,Home_exp_offensive_value,Away_exp_defensive_value,Home_exp_defensive_value,Home_Team_Ground,Away_Team_Ground,Away_Team_City
621,Port Adelaide,Greater Western Sydney,9.16.70 - 13.15.93,-23,163,0.0,Adelaide Oval,Adelaide,2023-09-16 19:40:00,0,18,MOSTLY_SUNNY,2023,2023F2,2023F2_PortAdelaide_GreaterWesternSydney,2023,70,9,16,25,0.36,93,13,15,28,0.4643,93.4562,78.4249,17.4746,13.5095,15.4538,11.3737,2.0208,2.1359,111.858,99.9296,102.0686,95.4825,9.7894,4.4471,Adelaide Oval,Sydney Showground,Sydney


Updating Process:

1. Sum up player values from each game as the actual offensive rating for that team
2. Get difference in match expected vaep values as the actual "Team Rating" for that match
3. Take into account home advantage for home team
4. Update long term player ratings
5. Update long term team ratings
5. Calculate "Defensive Rating" as the "Offensive Rating" + "Team Rating" (just for completeness)

Calculate Home Advantage (Interstate)

In [16]:
intrastate_matches = match_summary_stats[(match_summary_stats['Venue'] == match_summary_stats['Home_Team_Ground']) & (match_summary_stats['City'] == match_summary_stats['Away_Team_City'])]
interstate_matches = match_summary_stats[(match_summary_stats['Venue'] == match_summary_stats['Home_Team_Ground']) & (match_summary_stats['City'] != match_summary_stats['Away_Team_City'])]

In [39]:
def home_advantage_summary(matches, include_travel_from = False):
    
    if include_travel_from:
        group = ['City', 'Away_Team_City']
    else:
        group = ['City']
    
    home_advantage = matches.groupby(group).agg(
        Home_Score = ('Home_Score', 'mean'),
        Away_Score = ('Away_Score', 'mean'),
        Home_xScore = ('Home_xScore', 'mean'),
        Away_xScore = ('Away_xScore', 'mean'),
        Home_exp_vaep_value = ('Home_exp_vaep_value', 'mean'),
        Away_exp_vaep_value = ('Away_exp_vaep_value', 'mean'),
        Games = ('Home_Team', 'count')
    )
    home_advantage['Home_Score_Diff'] = home_advantage['Home_Score'] - home_advantage['Away_Score']
    home_advantage['Home_xScore_Diff'] = home_advantage['Home_xScore'] - home_advantage['Away_xScore']
    home_advantage['Home_xVAEP_Diff'] = home_advantage['Home_exp_vaep_value'] - home_advantage['Away_exp_vaep_value']
    
    return home_advantage

In [42]:
intrastate_home_advantage = home_advantage_summary(intrastate_matches)
intrastate_home_advantage[intrastate_home_advantage['Games'] > 10][['Home_Score_Diff', 'Home_xScore_Diff', 'Home_xVAEP_Diff', "Games"]]

Unnamed: 0_level_0,Home_Score_Diff,Home_xScore_Diff,Home_xVAEP_Diff,Games
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Melbourne,2.0,4.041,3.6581,109


In [44]:
interstate_home_advantage = home_advantage_summary(interstate_matches, include_travel_from=True)
interstate_home_advantage[interstate_home_advantage['Games'] > 10][['Home_Score_Diff', 'Home_xScore_Diff', 'Home_xVAEP_Diff', "Games"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Home_Score_Diff,Home_xScore_Diff,Home_xVAEP_Diff,Games
City,Away_Team_City,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Adelaide,Melbourne,9.1562,12.3292,11.8549,32
Brisbane,Melbourne,28.9412,29.5775,26.6382,17
Geelong,Melbourne,32.9231,38.9325,36.9441,13
Gold Coast,Melbourne,6.5,6.2372,6.134,14
Melbourne,Adelaide,6.3182,6.9358,5.9238,22
Melbourne,Brisbane,-1.0,0.0837,1.1468,11
Melbourne,Geelong,-2.5,4.8365,5.1179,12
Melbourne,Perth,9.6923,10.3721,9.5511,26
Melbourne,Sydney,3.0714,4.9108,3.9084,28
Perth,Melbourne,-18.0811,-20.8928,-19.334,37


These are just descriptive of what's happened. This doesn't include anything about the strengths of teams.

For example, teams in Melbourne are better than teams in Perth, so they beat them regularly as seen in the Home_Score_Diff.

But Home Advantage should account for that and calculate how much does the home team gain from playing at home instead of playing at a neutral venue.