Creating Match Projections using OOP

In [1]:
import pandas as pd
import numpy as np

from data_preparation import load_match_summary
from data_preparation import load_player_stats
from data_preparation import aggregate_player_to_match_stats
from data_preparation import load_team_info
from data_preparation import load_venue_info
from data_preparation import create_match_summary_stats

from player import Player
from match import Match
from team import Team
from rating_calculator import PlayerRatingCalculator, TeamRatingCalculator
from projector import Projector

from match_projections import initialise_teams_players, update_ratings, actual_vs_expected

import warnings
warnings.filterwarnings('ignore')
pd.options.display.max_rows = 999
pd.options.display.max_columns = 999
pd.set_option('display.precision', 4)
%load_ext autoreload
%autoreload 2

1. load expected vaep data and player data
2. simulate initial updating process
3. put into a function
4. create objective function
5. run optuna optimisation to find update parameters

Load in player and match stats

In [2]:
match_summary = load_match_summary()
player_stats = load_player_stats()
match_stats = aggregate_player_to_match_stats(player_stats)
team_info, home_team_info, away_team_info = load_team_info()
venue_info, away_venue_info = load_venue_info()
match_summary_stats = create_match_summary_stats(match_summary, match_stats, home_team_info, away_team_info, away_venue_info)

Updating Process:

1. Sum up player values from each game as the actual offensive rating for that team
2. Get difference in match expected vaep values as the actual "Team Rating" for that match
3. Take into account home advantage for home team
4. Update long term player ratings
5. Update long term team ratings
5. Calculate "Defensive Rating" as the "Offensive Rating" + "Team Rating" (just for completeness)

## Initialise Teams and Players

In [3]:
team_dict = initialise_teams_players(player_stats)

In [4]:
round_dict = {}

In [5]:
projections_dict = {}

# Simulate Season

In [6]:
season_2021 = list(player_stats[player_stats['Season'] == 2021]['Round_ID'].unique())

In [7]:
for round_id in season_2021:
    update_ratings(player_stats, match_summary, round_dict, round_id, team_dict, projections_dict)

In [8]:
error_dict = actual_vs_expected(round_dict, projections_dict)

In [24]:
tip_list = []
match_list = []
for round_id in season_2021:
    tip_list.append(np.array(list({k: v['tip'] for (k, v) in error_dict[round_id].items()}.values())).sum())
    match_list.append(len(error_dict[round_id].keys()))
    print(round_id, np.array(list({k: v['tip'] for (k, v) in error_dict[round_id].items()}.values())).sum(), len(error_dict[round_id].keys()))
print(np.array(tip_list).sum() / np.array(match_list).sum())

202101 0 8
202102 3 9
202103 2 9
202104 6 9
202105 6 9
202106 5 9
202107 5 9
202108 4 9
202109 6 9
202110 5 9
202111 5 9
202112 1 6
202113 2 7
202114 3 5
202115 6 9
202116 6 9
202117 4 9
202118 3 9
202119 5 9
202120 8 9
202121 4 9
202122 6 9
202123 5 9
2021F1 1 4
2021F2 1 2
2021F3 1 2
2021F4 1 1
0.5048543689320388


In [25]:
mae_list = []
match_list = []
for round_id in season_2021:
    mae_list.append(np.array(list({k: v['mae'] for (k, v) in error_dict[round_id].items()}.values())).sum() / len(error_dict[round_id].keys()))
    match_list.append(len(error_dict[round_id].keys()))
    print(round_id, np.array(list({k: v['mae'] for (k, v) in error_dict[round_id].items()}.values())).sum() / len(error_dict[round_id].keys()), len(error_dict[round_id].keys()))
np.array(mae_list).mean()

202101 22.375 8
202102 27.606799999999996 9
202103 43.096511111111106 9
202104 16.009222222222224 9
202105 37.77955555555556 9
202106 36.885766666666676 9
202107 35.932199999999995 9
202108 22.53476666666667 9
202109 22.697733333333343 9
202110 26.120800000000003 9
202111 33.658655555555555 9
202112 30.60976666666667 6
202113 22.80084285714286 7
202114 24.173799999999993 5
202115 17.08534444444445 9
202116 26.919722222222216 9
202117 30.931155555555545 9
202118 35.56559999999999 9
202119 25.752688888888887 9
202120 28.86692222222222 9
202121 29.469622222222224 9
202122 37.4973111111111 9
202123 29.808722222222222 9
2021F1 33.18662500000001 4
2021F2 19.497250000000005 2
2021F3 74.72675 2
2021F4 51.5222 1


31.226345723104057

In [11]:
season_2022 = list(player_stats[player_stats['Season'] == 2022]['Round_ID'].unique())

In [12]:
for round_id in season_2022:
    update_ratings(player_stats, match_summary, round_dict, round_id, team_dict, projections_dict)

In [13]:
error_dict = actual_vs_expected(round_dict, projections_dict)

In [23]:
tip_list = []
match_list = []
for round_id in season_2022:
    tip_list.append(np.array(list({k: v['tip'] for (k, v) in error_dict[round_id].items()}.values())).sum())
    match_list.append(len(error_dict[round_id].keys()))
    print(round_id, np.array(list({k: v['tip'] for (k, v) in error_dict[round_id].items()}.values())).sum(), len(error_dict[round_id].keys()))
print(np.array(tip_list).sum() / np.array(match_list).sum())

202201 5 9
202202 5 9
202203 6 9
202204 5 9
202205 4 9
202206 6 9
202207 5 9
202208 5 9
202209 8 9
202210 6 9
202211 7 9
202212 4 6
202213 5 6
202214 4 6
202215 6 9
202216 6 9
202217 5 9
202218 4 9
202219 7 9
202220 6 9
202221 7 9
202222 7 9
202223 7 9
2022F1 2 4
2022F2 0 2
2022F3 2 2
2022F4 0 1
0.6473429951690821


In [15]:
mae_list = []
match_list = []
for round_id in season_2022:
    mae_list.append(np.array(list({k: v['mae'] for (k, v) in error_dict[round_id].items()}.values())).sum() / len(error_dict[round_id].keys()))
    match_list.append(len(error_dict[round_id].keys()))
    print(round_id, np.array(list({k: v['mae'] for (k, v) in error_dict[round_id].items()}.values())).sum() / len(error_dict[round_id].keys()), len(error_dict[round_id].keys()))
np.array(mae_list).mean()

202201 20.678288888888886 9
202202 32.54962222222221 9
202203 25.344322222222218 9
202204 33.69044444444444 9
202205 35.756255555555555 9
202206 24.741844444444446 9
202207 34.554822222222214 9
202208 33.736044444444445 9
202209 24.73486666666666 9
202210 20.012255555555555 9
202211 28.767877777777773 9
202212 26.1815 6
202213 16.157033333333334 6
202214 25.101349999999996 6
202215 28.0449 9
202216 24.505033333333337 9
202217 29.737911111111107 9
202218 25.743111111111105 9
202219 13.307955555555557 9
202220 24.839866666666666 9
202221 21.473100000000002 9
202222 27.843188888888893 9
202223 39.14500000000001 9
2022F1 10.752400000000005 4
2022F2 26.64084999999999 2
2022F3 36.61980000000001 2
2022F4 103.4904 1


29.4129646090535

In [16]:
error_dict['2022F4']

{'2022F4_Geelong_Sydney': {'actual_vaep': 57.668954294725,
  'actual': 80.0,
  'expected': -23.490399999999994,
  'mae': 103.4904,
  'tip': 0}}

In [17]:
season_2023 = list(player_stats[player_stats['Season'] == 2023]['Round_ID'].unique())

In [18]:
for round_id in season_2023:
    update_ratings(player_stats, match_summary, round_dict, round_id, team_dict, projections_dict)

In [19]:
error_dict = actual_vs_expected(round_dict, projections_dict)

In [22]:
tip_list = []
match_list = []
for round_id in season_2023:
    tip_list.append(np.array(list({k: v['tip'] for (k, v) in error_dict[round_id].items()}.values())).sum())
    match_list.append(len(error_dict[round_id].keys()))
    print(round_id, np.array(list({k: v['tip'] for (k, v) in error_dict[round_id].items()}.values())).sum(), len(error_dict[round_id].keys()))
print(np.array(tip_list).sum() / np.array(match_list).sum())

202301 7 9
202302 4 9
202303 4 9
202304 5 9
202305 7 9
202306 5 9
202307 7 9
202308 7 9
202309 8 9
202310 7 9
202311 5 9
202312 6 7
202313 5 8
202314 4 6
202315 4 6
202316 7 9
202317 7 9
202318 5 9
202319 4 9
202320 3 9
202321 4 9
202322 3 9
202323 6 9
202324 7 9
2023F1 2 4
2023F2 1 2
2023F3 1 2
2023F4 1 1
0.6296296296296297


In [26]:
mae_list = []
match_list = []
for round_id in season_2023:
    mae_list.append(np.array(list({k: v['mae'] for (k, v) in error_dict[round_id].items()}.values())).sum() / len(error_dict[round_id].keys()))
    match_list.append(len(error_dict[round_id].keys()))
    print(round_id, np.array(list({k: v['mae'] for (k, v) in error_dict[round_id].items()}.values())).sum() / len(error_dict[round_id].keys()), len(error_dict[round_id].keys()))
np.array(mae_list).mean()

202301 30.21506666666666 9
202302 31.996811111111104 9
202303 27.543677777777777 9
202304 32.515855555555554 9
202305 22.74906666666667 9
202306 31.630911111111118 9
202307 32.99183333333333 9
202308 20.554877777777776 9
202309 33.18981111111111 9
202310 31.013099999999994 9
202311 20.944900000000004 9
202312 20.526099999999996 7
202313 29.091024999999995 8
202314 35.170683333333336 6
202315 46.61371666666667 6
202316 36.86182222222223 9
202317 27.670388888888894 9
202318 31.741799999999994 9
202319 22.22833333333333 9
202320 27.247044444444445 9
202321 25.457622222222216 9
202322 29.751144444444442 9
202323 28.533166666666666 9
202324 26.325844444444442 9
2023F1 28.406625000000005 4
2023F2 20.057049999999983 2
2023F3 21.7462 2
2023F4 26.632000000000012 1


28.55023134920635

In [27]:
error_dict['2023F1']

{'2023F1_BrisbaneLions_PortAdelaide': {'actual_vaep': 10.559886316402995,
  'actual': 54.0,
  'expected': 8.976100000000002,
  'mae': 45.0239,
  'tip': 1},
 '2023F1_Carlton_Sydney': {'actual_vaep': -4.068329861690017,
  'actual': 8.0,
  'expected': -13.242000000000004,
  'mae': 21.242000000000004,
  'tip': 0},
 '2023F1_Collingwood_Melbourne': {'actual_vaep': -17.952445181583002,
  'actual': 15.0,
  'expected': -10.797100000000015,
  'mae': 25.797100000000015,
  'tip': 0},
 '2023F1_StKilda_GreaterWesternSydney': {'actual_vaep': -19.047465060800008,
  'actual': -27.0,
  'expected': -5.436499999999988,
  'mae': 21.563500000000012,
  'tip': 1}}