In [1]:
import pandas as pd
import numpy as np

pd.options.display.max_columns = None
pd.options.display.max_rows = None

import warnings
warnings.filterwarnings("ignore")  

from afl_match_outcome_model.config import match_summary_file_path, player_stats_file_path



Load Data

In [17]:
match_summary = pd.read_csv(match_summary_file_path)
match_summary = match_summary[match_summary['Year'] > 2020]

In [18]:
player_stats = pd.read_csv(player_stats_file_path)

Preprocess Data

Convert Match Summary from Home-Away to Team-Opponent

In [72]:
def convert_home_away_to_team_opp_data(data):
    
    home_data = data.copy()
    home_data = home_data.rename(columns={'Home_Team':'Team', 'Away_Team':'Opponent'})
    home_data['Home'] = 1
    home_data['Result'] = np.where(home_data['Home Win'] == 1, 1, 0)

    away_data = data.copy()
    away_data = away_data.rename(columns={'Home_Team':'Opponent', 'Away_Team':'Team'})
    away_data['Home'] = 0
    away_data['Result'] = np.where(away_data['Home Win'] == 1, 0, 1)
    away_data['Margin'] = -1*away_data['Margin']
    
    team_opponent_data = pd.concat([home_data, away_data], axis=0)
    team_opponent_data = team_opponent_data.sort_values(by = ['Match_ID', 'Date']).reset_index(drop=True) 
    
    team_opponent_data = team_opponent_data.drop(columns = ['Home Win', "Attendance", "Weather_Type", "Round_ID", "Season"])
    
    return team_opponent_data

In [73]:
from sklearn.preprocessing import FunctionTransformer

In [74]:
home_away_team_opp_transformer = FunctionTransformer(func=convert_home_away_to_team_opp_data, validate=False)

In [75]:
from sklearn.pipeline import Pipeline

Create Scores

In [76]:
def create_score_columns(data):
    
    data['Team_Score'] = np.where(data['Home'] == 1,
                                data['Q4_Score'].apply(lambda x: x.split(" - ")[0].split(".")[-1]).astype(int),
                                data['Q4_Score'].apply(lambda x: x.split(" - ")[1].split(".")[-1]).astype(int))

    data['Opp_Score'] = np.where(data['Home'] == 0,
                                data['Q4_Score'].apply(lambda x: x.split(" - ")[0].split(".")[-1]).astype(int),
                                data['Q4_Score'].apply(lambda x: x.split(" - ")[1].split(".")[-1]).astype(int))    
    
    return data

def create_goal_columns(data):
    
    data['Team_Goals'] = np.where(data['Home'] == 1,
                                data['Q4_Score'].apply(lambda x: x.split(" - ")[0].split(".")[0]).astype(int),
                                data['Q4_Score'].apply(lambda x: x.split(" - ")[1].split(".")[0]).astype(int))

    data['Opp_Goals'] = np.where(data['Home'] == 0,
                                data['Q4_Score'].apply(lambda x: x.split(" - ")[0].split(".")[0]).astype(int),
                                data['Q4_Score'].apply(lambda x: x.split(" - ")[1].split(".")[0]).astype(int))
    return data

def create_behind_columns(data):
    
    data['Team_Behinds'] = np.where(data['Home'] == 1, 
                                data['Q4_Score'].apply(lambda x: x.split(" - ")[0].split(".")[1]).astype(int),
                                data['Q4_Score'].apply(lambda x: x.split(" - ")[1].split(".")[1]).astype(int))

    data['Opp_Behinds'] = np.where(data['Home'] == 0, 
                                data['Q4_Score'].apply(lambda x: x.split(" - ")[0].split(".")[1]).astype(int),
                                data['Q4_Score'].apply(lambda x: x.split(" - ")[1].split(".")[1]).astype(int))
    
    return data

def split_scores(data):
    
    data = create_score_columns(data)
    data = create_goal_columns(data)
    data = create_behind_columns(data)
    
    data = data.drop(columns = ['Q4_Score'])

    return data

In [77]:
create_score_transformer = FunctionTransformer(func=split_scores, validate=False)

Distance in Time between Games

In [78]:
def format_date_columns(data):
    
    data['Date'] = pd.to_datetime(data['Date']).dt.date
    data['Date'] = pd.to_datetime(data['Date'])
    data['Day'] = pd.to_datetime(data['Date']).dt.day
    data['Month'] = pd.to_datetime(data['Date']).dt.month
    data['Year'] = pd.to_datetime(data['Date']).dt.year

    data['Date_Previous_Match'] = data.groupby('Team').shift(1)['Date']
    data['Opp_Date_Previous_Match'] = data.groupby('Opponent').shift(1)['Date']

    data['Days_Since_Previous_Match'] = (pd.to_datetime(data['Date']) - pd.to_datetime(data['Date_Previous_Match'])).dt.days
    data['Opp_Days_Since_Previous_Match'] = (pd.to_datetime(data['Date']) - pd.to_datetime(data['Opp_Date_Previous_Match'])).dt.days
    
    return data

In [79]:
def add_number_matches_last_period(data, period = '30D', group = 'Team'):
    data['Date'] = pd.to_datetime(data['Date'])  # Ensure 'Date' column is in datetime format

    number_matches_last_period = (
        data.set_index('Date')
        .groupby(group)
        .rolling(window=period, closed='left')
        .count()
        .fillna(0)
        .reset_index()[[group, 'Date', 'Match_ID']]
    )
    number_matches_last_period.columns = [group, 'Date', group+'_Number_Matches_'+period]

    new_data = data.merge(number_matches_last_period, how='left', on=['Date', group])

    return new_data


In [95]:
def create_date_columns(data):
    
    data = format_date_columns(data)
    
    data = add_number_matches_last_period(data, period = '30D', group='Team')
    data = add_number_matches_last_period(data, period = '30D', group='Opponent')
    data = add_number_matches_last_period(data, period = '7D', group='Team')
    data = add_number_matches_last_period(data, period = '7D', group='Opponent')
        
    return data

In [96]:
date_transformer = FunctionTransformer(func=create_date_columns, validate=False)

In [97]:
match_summary_pipeline = Pipeline([
    ('home_away_team_opp', home_away_team_opp_transformer),
    ('create_scores', create_score_transformer),
    ('date_transformer', date_transformer)
])

In [98]:
match_summary_transformed = match_summary_pipeline.fit_transform(match_summary)
match_summary_transformed.head()

Unnamed: 0,Team,Opponent,Margin,Total Game Score,Venue,City,Date,Temperature,Year,Match_ID,Home,Result,Team_Score,Opp_Score,Team_Goals,Opp_Goals,Team_Behinds,Opp_Behinds,Day,Month,Date_Previous_Match,Opp_Date_Previous_Match,Days_Since_Previous_Match,Opp_Days_Since_Previous_Match,Team_Number_Matches_30D,Opponent_Number_Matches_30D,Team_Number_Matches_7D,Opponent_Number_Matches_7D
0,Adelaide,Geelong,12,194,Adelaide Oval,Adelaide,2021-03-20,18.0,2021,202101_Adelaide_Geelong,1,1,103,91,15,13,13,13,20,3,NaT,NaT,,,0.0,0.0,0.0,0.0
1,Geelong,Adelaide,-12,194,Adelaide Oval,Adelaide,2021-03-20,18.0,2021,202101_Adelaide_Geelong,0,0,91,103,13,15,13,13,20,3,NaT,NaT,,,0.0,0.0,0.0,0.0
2,Brisbane Lions,Sydney,-31,219,Gabba,Brisbane,2021-03-20,18.0,2021,202101_BrisbaneLions_Sydney,1,0,94,125,14,19,10,11,20,3,NaT,NaT,,,0.0,0.0,0.0,0.0
3,Sydney,Brisbane Lions,31,219,Gabba,Brisbane,2021-03-20,18.0,2021,202101_BrisbaneLions_Sydney,0,1,125,94,19,14,11,10,20,3,NaT,NaT,,,0.0,0.0,0.0,0.0
4,Collingwood,Western Bulldogs,-16,122,M.C.G.,Melbourne,2021-03-19,27.0,2021,202101_Collingwood_WesternBulldogs,1,0,53,69,7,10,11,9,19,3,NaT,NaT,,,0.0,0.0,0.0,0.0


Aggregate Player Stats by Team and Merge

In [99]:
def create_team_stats(player_stats):
    
    team_stats = player_stats.groupby(['Match_ID', 'Team']).agg(
        Age_avg = ("Age", "mean"),
        Height_avg = ("Height", "mean"),
        Weight_avg = ("Weight", "mean"),
        Debut_Year_avg = ("Debut_Year", "mean"),
        Draft_Year_avg = ("Draft_Year", "mean"),
        Draft_Position_avg = ("Draft_Position", "mean"),
        
        Centre_Clearances = ("Centre_Clearances", "sum"),
        Clangers= ("Clangers", "sum"),
        Defensive_Contest_Losses= ("Defensive_Contest_Losses", "sum"),
        Contested_Marks = ("Contested_Marks", "sum"),
        Contested_Possessions = ("Contested_Possessions", "sum"),
        Offensive_Contest_Wins = ("Offensive_Contest_Wins", "sum"),
        Defensive_Half_Pressure_Acts = ("Defensive_Half_Pressure_Acts", "sum"),
        Disposals = ("Disposals", "sum"),
        Effective_Disposals = ("Effective_Disposals", "sum"),
        Effective_Kicks = ("Effective_Kicks", "sum"),
        Inside_50_Ground_Ball_Gets = ("Inside_50_Ground_Ball_Gets", "sum"),
        Frees_Against = ("Frees_Against", "sum"),
        Frees_For = ("Frees_For", "sum"),
        Goal_Assists = ("Goal_Assists", "sum"),
        Ground_Ball_Gets = ("Ground_Ball_Gets", "sum"),
        Handballs = ("Handballs", "sum"),
        Hit_Outs = ("Hit_Outs", "sum"),
        Hit_Outs_To_Advantage = ("Hit_Outs_To_Advantage", "sum"),
        Inside_50s = ("Inside_50s", "sum"),
        Intercept_Marks = ("Intercept_Marks", "sum"),
        GIntercepts = ("Intercepts", "sum"),
        Kicks = ("Kicks", "sum"),
        Marks = ("Marks", "sum"),
        Marks_On_Lead = ("Marks_On_Lead", "sum"),
        Metres_Gained = ("Metres_Gained", "sum"),
        Pressure_Acts = ("Pressure_Acts", "sum"),
        Rebound_50s = ("Rebound_50s", "sum"),
        Shots_At_Goal = ("Shots_At_Goal", "sum"),
        Spoils = ("Spoils", "sum"),
        Stoppage_Clearances = ("Stoppage_Clearances", "sum"),
        Clearances = ("Clearances", "sum"),
        Possessions = ("Possessions", "sum"),
        Turnovers = ("Turnovers", "sum"),
        Uncontested_Possessions = ("Uncontested_Possessions", "sum"),
        Coaches_Votes = ("Coaches_Votes", "sum"),
        Brownlow_Votes = ("Brownlow_Votes", "sum"),
        AFL_Fantasy_Points = ("AFL_Fantasy_Points", "sum"),
        Player_Rating_Points = ("Player_Rating_Points", "sum"),
        
        AFL_Fantasy_Points_max = ("AFL_Fantasy_Points", "max"),
        Player_Rating_Points_max = ("Player_Rating_Points", "max"),
    ).reset_index()
        
    return team_stats

In [100]:
team_stats_transformer = FunctionTransformer(func=create_team_stats, validate=False)

In [101]:
match_summary_pipeline = Pipeline([
    ('home_away_team_opp', home_away_team_opp_transformer),
    ('create_scores', create_score_transformer),
    ('date_transformer', date_transformer)
])

In [102]:
player_stats_pipeline = Pipeline([
    ('team_stats', team_stats_transformer)
])

In [103]:
match_summary_transformed = match_summary_pipeline.fit_transform(match_summary)
team_stats = player_stats_pipeline.fit_transform(player_stats)

In [104]:
match_outcome_data = match_summary_transformed.merge(team_stats, how = "left", on = ["Match_ID", "Team"])

In [105]:
match_outcome_data.tail()

Unnamed: 0,Team,Opponent,Margin,Total Game Score,Venue,City,Date,Temperature,Year,Match_ID,Home,Result,Team_Score,Opp_Score,Team_Goals,Opp_Goals,Team_Behinds,Opp_Behinds,Day,Month,Date_Previous_Match,Opp_Date_Previous_Match,Days_Since_Previous_Match,Opp_Days_Since_Previous_Match,Team_Number_Matches_30D,Opponent_Number_Matches_30D,Team_Number_Matches_7D,Opponent_Number_Matches_7D,Age_avg,Height_avg,Weight_avg,Debut_Year_avg,Draft_Year_avg,Draft_Position_avg,Centre_Clearances,Clangers,Defensive_Contest_Losses,Contested_Marks,Contested_Possessions,Offensive_Contest_Wins,Defensive_Half_Pressure_Acts,Disposals,Effective_Disposals,Effective_Kicks,Inside_50_Ground_Ball_Gets,Frees_Against,Frees_For,Goal_Assists,Ground_Ball_Gets,Handballs,Hit_Outs,Hit_Outs_To_Advantage,Inside_50s,Intercept_Marks,GIntercepts,Kicks,Marks,Marks_On_Lead,Metres_Gained,Pressure_Acts,Rebound_50s,Shots_At_Goal,Spoils,Stoppage_Clearances,Clearances,Possessions,Turnovers,Uncontested_Possessions,Coaches_Votes,Brownlow_Votes,AFL_Fantasy_Points,Player_Rating_Points,AFL_Fantasy_Points_max,Player_Rating_Points_max
1247,Carlton,Brisbane Lions,-16,142,Gabba,Brisbane,2023-09-23,23.0,2023,2023F3_BrisbaneLions_Carlton,0,0,63,79,9,11,9,13,23,9,2023-09-15,2023-09-09,8.0,14.0,3.0,2.0,0.0,0.0,25.521739,188.869565,88.347826,2017.304348,2016.173913,17.55,9.0,59.0,1.0,19.0,135.0,5.0,126.0,328.0,245.0,132.0,9.0,17.0,15.0,7.0,92.0,134.0,33.0,7.0,52.0,29.0,79.0,194.0,80.0,5.0,6541.0,262.0,50.0,20.0,28.0,21.0,30.0,321.0,74.0,186.0,0.0,0.0,1383.0,195.3,105.0,25.2
1248,Collingwood,Greater Western Sydney,1,115,M.C.G.,Melbourne,2023-09-22,16.0,2023,2023F3_Collingwood_GreaterWesternSydney,1,1,58,57,8,8,10,9,22,9,2023-09-07,2023-09-16,15.0,6.0,2.0,3.0,0.0,1.0,27.217391,188.73913,88.956522,2015.478261,2014.521739,27.190476,11.0,61.0,0.0,11.0,140.0,0.0,0.0,356.0,0.0,0.0,0.0,12.0,12.0,6.0,0.0,133.0,45.0,0.0,52.0,0.0,78.0,223.0,98.0,0.0,6228.0,0.0,39.0,20.0,0.0,33.0,44.0,348.0,83.0,208.0,0.0,0.0,1579.0,0.0,114.0,
1249,Greater Western Sydney,Collingwood,-1,115,M.C.G.,Melbourne,2023-09-22,16.0,2023,2023F3_Collingwood_GreaterWesternSydney,0,0,57,58,8,8,9,10,22,9,2023-09-16,2023-09-07,6.0,15.0,3.0,2.0,1.0,0.0,25.695652,189.521739,88.913043,2017.043478,2015.565217,24.761905,8.0,64.0,0.0,16.0,154.0,0.0,0.0,377.0,0.0,0.0,0.0,12.0,12.0,7.0,0.0,154.0,28.0,0.0,48.0,0.0,83.0,223.0,89.0,0.0,6337.0,0.0,44.0,21.0,0.0,18.0,26.0,365.0,77.0,211.0,0.0,0.0,1543.0,0.0,106.0,
1250,Collingwood,Brisbane Lions,4,176,M.C.G.,Melbourne,2023-09-29,29.0,2023,2023F4_Collingwood_BrisbaneLions,1,1,90,86,12,13,18,8,29,9,2023-09-22,2023-09-23,7.0,6.0,2.0,2.0,1.0,1.0,27.130435,189.0,89.0,2015.652174,2014.565217,30.0,10.0,61.0,0.0,9.0,122.0,0.0,0.0,340.0,0.0,0.0,0.0,22.0,16.0,9.0,0.0,121.0,50.0,0.0,57.0,0.0,60.0,219.0,95.0,0.0,6200.0,0.0,30.0,30.0,0.0,28.0,38.0,335.0,49.0,213.0,0.0,0.0,1561.0,0.0,124.0,
1251,Brisbane Lions,Collingwood,-4,176,M.C.G.,Melbourne,2023-09-29,29.0,2023,2023F4_Collingwood_BrisbaneLions,0,0,86,90,13,12,8,18,29,9,2023-09-23,2023-09-22,6.0,7.0,2.0,2.0,1.0,1.0,26.26087,187.478261,86.695652,2016.043478,2015.608696,25.9,14.0,61.0,0.0,8.0,122.0,0.0,0.0,308.0,0.0,0.0,0.0,16.0,22.0,10.0,0.0,103.0,32.0,0.0,43.0,0.0,49.0,205.0,89.0,0.0,6010.0,0.0,44.0,22.0,0.0,30.0,44.0,295.0,60.0,173.0,0.0,0.0,1392.0,0.0,127.0,


Rolling Averages

In [106]:
def rolling_averages(group, cols, new_cols, window = 3):
    group = group.sort_values("Date")
    group[new_cols] = group[cols].rolling(window, closed='left').mean()
    
    return group

In [107]:
def create_numeric_rolling_averages(data):
    numeric_cols = list(match_outcome_data.select_dtypes('number'))
    
    rolling_team_cols = [x+"_Team_rolling" for x in numeric_cols]
    data = data.groupby('Team').apply(lambda x: rolling_averages(x, numeric_cols, rolling_team_cols)).sort_values(by=['Match_ID', 'Date']).reset_index(drop=True)

    rolling_opp_cols = [x+"_Opp_rolling" for x in numeric_cols]
    data = data.groupby('Opponent').apply(lambda x: rolling_averages(x, numeric_cols, rolling_opp_cols)).sort_values(by=['Match_ID', 'Date']).reset_index(drop=True)

    data = data.drop(columns = numeric_cols)

    return data

In [108]:
rolling_average_transformer = FunctionTransformer(func=create_numeric_rolling_averages, validate=False)

In [109]:
rolling_pipeline = Pipeline([
    ('rolling_averages', rolling_average_transformer)
])
rolling_match_outcome_data = rolling_pipeline.fit_transform(match_outcome_data)
rolling_match_outcome_data.tail()

Unnamed: 0,Team,Opponent,Venue,City,Date,Match_ID,Date_Previous_Match,Opp_Date_Previous_Match,Margin_Team_rolling,Total Game Score_Team_rolling,Temperature_Team_rolling,Year_Team_rolling,Home_Team_rolling,Result_Team_rolling,Team_Score_Team_rolling,Opp_Score_Team_rolling,Team_Goals_Team_rolling,Opp_Goals_Team_rolling,Team_Behinds_Team_rolling,Opp_Behinds_Team_rolling,Day_Team_rolling,Month_Team_rolling,Days_Since_Previous_Match_Team_rolling,Opp_Days_Since_Previous_Match_Team_rolling,Team_Number_Matches_30D_Team_rolling,Opponent_Number_Matches_30D_Team_rolling,Team_Number_Matches_7D_Team_rolling,Opponent_Number_Matches_7D_Team_rolling,Age_avg_Team_rolling,Height_avg_Team_rolling,Weight_avg_Team_rolling,Debut_Year_avg_Team_rolling,Draft_Year_avg_Team_rolling,Draft_Position_avg_Team_rolling,Centre_Clearances_Team_rolling,Clangers_Team_rolling,Defensive_Contest_Losses_Team_rolling,Contested_Marks_Team_rolling,Contested_Possessions_Team_rolling,Offensive_Contest_Wins_Team_rolling,Defensive_Half_Pressure_Acts_Team_rolling,Disposals_Team_rolling,Effective_Disposals_Team_rolling,Effective_Kicks_Team_rolling,Inside_50_Ground_Ball_Gets_Team_rolling,Frees_Against_Team_rolling,Frees_For_Team_rolling,Goal_Assists_Team_rolling,Ground_Ball_Gets_Team_rolling,Handballs_Team_rolling,Hit_Outs_Team_rolling,Hit_Outs_To_Advantage_Team_rolling,Inside_50s_Team_rolling,Intercept_Marks_Team_rolling,GIntercepts_Team_rolling,Kicks_Team_rolling,Marks_Team_rolling,Marks_On_Lead_Team_rolling,Metres_Gained_Team_rolling,Pressure_Acts_Team_rolling,Rebound_50s_Team_rolling,Shots_At_Goal_Team_rolling,Spoils_Team_rolling,Stoppage_Clearances_Team_rolling,Clearances_Team_rolling,Possessions_Team_rolling,Turnovers_Team_rolling,Uncontested_Possessions_Team_rolling,Coaches_Votes_Team_rolling,Brownlow_Votes_Team_rolling,AFL_Fantasy_Points_Team_rolling,Player_Rating_Points_Team_rolling,AFL_Fantasy_Points_max_Team_rolling,Player_Rating_Points_max_Team_rolling,Margin_Opp_rolling,Total Game Score_Opp_rolling,Temperature_Opp_rolling,Year_Opp_rolling,Home_Opp_rolling,Result_Opp_rolling,Team_Score_Opp_rolling,Opp_Score_Opp_rolling,Team_Goals_Opp_rolling,Opp_Goals_Opp_rolling,Team_Behinds_Opp_rolling,Opp_Behinds_Opp_rolling,Day_Opp_rolling,Month_Opp_rolling,Days_Since_Previous_Match_Opp_rolling,Opp_Days_Since_Previous_Match_Opp_rolling,Team_Number_Matches_30D_Opp_rolling,Opponent_Number_Matches_30D_Opp_rolling,Team_Number_Matches_7D_Opp_rolling,Opponent_Number_Matches_7D_Opp_rolling,Age_avg_Opp_rolling,Height_avg_Opp_rolling,Weight_avg_Opp_rolling,Debut_Year_avg_Opp_rolling,Draft_Year_avg_Opp_rolling,Draft_Position_avg_Opp_rolling,Centre_Clearances_Opp_rolling,Clangers_Opp_rolling,Defensive_Contest_Losses_Opp_rolling,Contested_Marks_Opp_rolling,Contested_Possessions_Opp_rolling,Offensive_Contest_Wins_Opp_rolling,Defensive_Half_Pressure_Acts_Opp_rolling,Disposals_Opp_rolling,Effective_Disposals_Opp_rolling,Effective_Kicks_Opp_rolling,Inside_50_Ground_Ball_Gets_Opp_rolling,Frees_Against_Opp_rolling,Frees_For_Opp_rolling,Goal_Assists_Opp_rolling,Ground_Ball_Gets_Opp_rolling,Handballs_Opp_rolling,Hit_Outs_Opp_rolling,Hit_Outs_To_Advantage_Opp_rolling,Inside_50s_Opp_rolling,Intercept_Marks_Opp_rolling,GIntercepts_Opp_rolling,Kicks_Opp_rolling,Marks_Opp_rolling,Marks_On_Lead_Opp_rolling,Metres_Gained_Opp_rolling,Pressure_Acts_Opp_rolling,Rebound_50s_Opp_rolling,Shots_At_Goal_Opp_rolling,Spoils_Opp_rolling,Stoppage_Clearances_Opp_rolling,Clearances_Opp_rolling,Possessions_Opp_rolling,Turnovers_Opp_rolling,Uncontested_Possessions_Opp_rolling,Coaches_Votes_Opp_rolling,Brownlow_Votes_Opp_rolling,AFL_Fantasy_Points_Opp_rolling,Player_Rating_Points_Opp_rolling,AFL_Fantasy_Points_max_Opp_rolling,Player_Rating_Points_max_Opp_rolling
1247,Brisbane Lions,Carlton,Gabba,Brisbane,2023-09-23,2023F3_BrisbaneLions_Carlton,2023-09-09,2023-09-15,28.0,184.666667,20.666667,2023.0,0.666667,1.0,106.333333,78.333333,15.666667,11.666667,12.333333,8.333333,17.666667,8.333333,9.333333,9.0,3.666667,3.666667,0.333333,0.666667,25.811594,187.797101,87.188406,2016.449275,2015.927536,26.733333,12.666667,54.0,3.0,12.666667,132.0,2.0,82.0,320.0,165.666667,98.666667,13.333333,19.0,20.333333,9.333333,59.0,112.333333,36.333333,5.333333,58.666667,11.333333,64.333333,207.666667,94.0,5.0,5609.666667,186.0,30.333333,29.333333,20.666667,26.666667,39.333333,316.333333,57.333333,184.333333,25.0,0.0,1452.666667,138.966667,104.666667,,8.0,154.666667,17.333333,2023.0,0.333333,0.333333,81.333333,73.333333,11.333333,11.0,13.333333,7.333333,16.666667,8.666667,9.333333,9.0,3.333333,3.333333,0.0,0.333333,25.724638,187.449275,87.333333,2016.753623,2015.565217,27.174603,9.666667,71.0,0.333333,12.333333,141.333333,1.0,55.0,371.333333,88.666667,49.0,4.0,19.333333,17.0,8.0,28.0,150.0,34.0,5.0,53.666667,8.666667,82.666667,221.333333,94.666667,2.666667,6142.333333,106.0,43.666667,26.666667,11.666667,28.0,37.666667,363.666667,81.333333,222.333333,15.666667,0.0,1548.333333,70.366667,114.333333,
1248,Greater Western Sydney,Collingwood,M.C.G.,Melbourne,2023-09-22,2023F3_Collingwood_GreaterWesternSydney,2023-09-16,2023-09-07,26.333333,173.0,16.666667,2023.0,0.0,1.0,99.666667,73.333333,14.666667,10.333333,11.666667,11.333333,17.333333,8.666667,9.333333,9.666667,3.333333,3.333333,0.333333,0.333333,25.768116,189.42029,88.724638,2017.0,2015.521739,24.936508,12.0,53.0,0.666667,7.333333,122.0,0.333333,57.333333,323.666667,90.333333,48.666667,5.0,13.0,12.666667,9.666667,34.333333,142.666667,29.0,2.333333,50.333333,5.666667,63.0,181.0,62.666667,1.333333,5318.333333,113.666667,37.666667,27.0,14.666667,23.666667,35.666667,318.333333,61.0,196.333333,29.333333,0.0,1293.666667,75.566667,102.666667,,-17.666667,156.333333,17.666667,2023.0,0.333333,0.333333,69.333333,87.0,9.666667,13.333333,11.333333,7.0,16.666667,8.333333,7.666667,9.0,3.666667,3.666667,0.666667,0.666667,25.463768,187.231884,87.362319,2016.927536,2016.086957,24.295238,11.0,54.333333,1.333333,10.666667,124.333333,1.666667,103.333333,338.333333,180.666667,99.0,10.333333,16.666667,17.666667,5.666667,55.333333,136.0,34.333333,6.0,51.0,9.0,64.666667,202.333333,91.0,3.666667,5279.666667,194.666667,32.0,21.666667,17.666667,23.0,34.0,333.0,61.0,208.666667,12.0,0.0,1440.666667,126.133333,118.333333,
1249,Collingwood,Greater Western Sydney,M.C.G.,Melbourne,2023-09-22,2023F3_Collingwood_GreaterWesternSydney,2023-09-07,2023-09-16,17.666667,156.333333,17.666667,2023.0,0.666667,0.666667,87.0,69.333333,13.333333,9.666667,7.0,11.333333,16.666667,8.333333,9.0,7.666667,3.666667,3.666667,0.666667,0.666667,27.434783,188.492754,88.695652,2015.147563,2014.246377,29.412698,11.0,59.0,1.666667,8.333333,123.333333,1.333333,86.333333,334.333333,183.0,97.0,9.333333,17.666667,16.666667,10.0,56.0,134.0,38.666667,6.0,46.0,8.0,61.666667,200.333333,80.0,6.0,5423.0,196.0,41.333333,22.0,20.333333,24.0,35.0,323.0,64.0,199.666667,18.0,0.0,1459.333333,142.666667,111.333333,,-26.333333,173.0,16.666667,2023.0,1.0,0.0,73.333333,99.666667,10.333333,14.666667,11.333333,11.666667,17.333333,8.666667,9.666667,9.333333,3.333333,3.333333,0.333333,0.333333,25.217391,187.797101,87.289855,2017.391304,2016.130435,23.428571,10.0,49.666667,0.333333,12.333333,118.333333,0.666667,45.0,303.0,91.0,43.666667,3.333333,12.666667,13.0,6.333333,30.333333,132.666667,24.666667,2.333333,47.0,5.666667,61.333333,170.333333,64.666667,2.0,5069.333333,108.666667,36.666667,18.666667,10.333333,18.333333,28.333333,295.333333,63.0,177.0,0.666667,0.0,1221.0,66.933333,100.666667,
1250,Collingwood,Brisbane Lions,M.C.G.,Melbourne,2023-09-29,2023F4_Collingwood_BrisbaneLions,2023-09-22,2023-09-23,26.0,120.0,19.0,2023.0,0.666667,1.0,73.0,47.0,11.0,6.0,7.0,11.0,18.0,8.666667,11.666667,7.666667,3.0,3.333333,0.333333,0.666667,27.449275,188.478261,88.782609,2015.246377,2014.26087,27.873016,9.0,61.333333,1.0,8.333333,128.0,1.0,41.666667,341.333333,101.333333,53.333333,4.333333,15.0,13.333333,8.333333,29.333333,131.333333,38.0,2.666667,47.666667,4.333333,69.666667,210.0,88.333333,5.0,5694.333333,100.333333,42.333333,19.666667,10.0,27.666667,36.666667,333.333333,72.666667,205.333333,15.0,0.0,1487.333333,78.6,108.666667,,-25.333333,157.333333,24.333333,2023.0,0.0,0.0,66.0,91.333333,9.666667,13.0,8.0,13.333333,19.333333,8.666667,9.333333,12.0,3.333333,3.0,0.333333,0.0,25.304348,187.971014,87.608696,2017.405797,2016.173913,24.406999,10.333333,59.0,1.666667,14.0,130.333333,4.333333,94.0,322.333333,172.666667,96.333333,7.0,19.333333,16.666667,5.333333,59.666667,124.333333,34.333333,4.666667,44.0,16.333333,66.666667,198.0,85.666667,3.333333,5558.333333,179.666667,47.0,19.666667,21.0,22.333333,32.666667,312.666667,69.333333,182.333333,0.666667,0.0,1399.0,127.2,114.666667,
1251,Brisbane Lions,Collingwood,M.C.G.,Melbourne,2023-09-29,2023F4_Collingwood_BrisbaneLions,2023-09-23,2023-09-22,25.333333,157.333333,24.333333,2023.0,1.0,1.0,91.333333,66.0,13.0,9.666667,13.333333,8.0,19.333333,8.666667,12.0,9.333333,3.0,3.333333,0.0,0.333333,25.927536,187.724638,87.0,2016.362319,2015.869565,26.2,11.666667,56.0,4.333333,11.333333,133.666667,1.666667,67.333333,318.0,158.333333,99.666667,12.666667,16.666667,19.333333,8.333333,61.666667,105.333333,35.666667,7.0,61.333333,13.0,69.333333,212.666667,96.666667,5.666667,5832.333333,182.666667,34.666667,27.0,26.666667,29.333333,41.0,313.666667,65.666667,180.0,18.0,0.0,1441.666667,136.366667,104.666667,,-26.0,120.0,19.0,2023.0,0.333333,0.0,47.0,73.0,6.0,11.0,11.0,7.0,18.0,8.666667,7.666667,11.666667,3.333333,3.0,0.666667,0.333333,25.391304,187.84058,87.913043,2017.173913,2016.014493,23.38254,8.666667,54.666667,1.0,11.666667,130.666667,1.0,50.666667,349.0,97.333333,50.333333,4.0,13.333333,15.0,4.0,26.0,144.0,29.333333,3.0,48.666667,4.0,72.666667,205.0,88.666667,2.0,5479.666667,103.666667,36.666667,18.333333,8.666667,21.0,29.666667,341.666667,68.666667,211.0,5.0,0.0,1437.333333,53.533333,118.333333,


Create Pipelines

In [112]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer, OneHotEncoder

In [113]:
categorical_features = ['Team', 'Opponent', 'Venue', 'City']

In [120]:
match_summary_pipeline = Pipeline([
    ('home_away_team_opp', home_away_team_opp_transformer),
    ('create_scores', create_score_transformer),
    ('date_transformer', date_transformer)
])
match_summary_transformed = match_summary_pipeline.fit_transform(match_summary)

player_stats_pipeline = Pipeline([
    ('team_stats', team_stats_transformer)
])
team_stats = player_stats_pipeline.fit_transform(player_stats)

match_outcome_data = match_summary_transformed.merge(team_stats, how = "left", on = ["Match_ID", "Team"])

rolling_pipeline = Pipeline([
    ('rolling_averages', rolling_average_transformer)
])
rolling_match_outcome_data = rolling_pipeline.fit_transform(match_outcome_data)

ohe = ColumnTransformer(
    transformers=[
        ('ohe', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), categorical_features)
    ]
)
ohe_data = pd.DataFrame(ohe.fit_transform(rolling_match_outcome_data), columns=[x.replace("ohe__", "") for x in ohe.get_feature_names_out()])

data = pd.concat([rolling_match_outcome_data, ohe_data], axis=1)
data = data.drop(columns=categorical_features + ['Match_ID', 'Date'])
data.tail()

Unnamed: 0,Date_Previous_Match,Opp_Date_Previous_Match,Margin_Team_rolling,Total Game Score_Team_rolling,Temperature_Team_rolling,Year_Team_rolling,Home_Team_rolling,Result_Team_rolling,Team_Score_Team_rolling,Opp_Score_Team_rolling,Team_Goals_Team_rolling,Opp_Goals_Team_rolling,Team_Behinds_Team_rolling,Opp_Behinds_Team_rolling,Day_Team_rolling,Month_Team_rolling,Days_Since_Previous_Match_Team_rolling,Opp_Days_Since_Previous_Match_Team_rolling,Team_Number_Matches_30D_Team_rolling,Opponent_Number_Matches_30D_Team_rolling,Team_Number_Matches_7D_Team_rolling,Opponent_Number_Matches_7D_Team_rolling,Age_avg_Team_rolling,Height_avg_Team_rolling,Weight_avg_Team_rolling,Debut_Year_avg_Team_rolling,Draft_Year_avg_Team_rolling,Draft_Position_avg_Team_rolling,Centre_Clearances_Team_rolling,Clangers_Team_rolling,Defensive_Contest_Losses_Team_rolling,Contested_Marks_Team_rolling,Contested_Possessions_Team_rolling,Offensive_Contest_Wins_Team_rolling,Defensive_Half_Pressure_Acts_Team_rolling,Disposals_Team_rolling,Effective_Disposals_Team_rolling,Effective_Kicks_Team_rolling,Inside_50_Ground_Ball_Gets_Team_rolling,Frees_Against_Team_rolling,Frees_For_Team_rolling,Goal_Assists_Team_rolling,Ground_Ball_Gets_Team_rolling,Handballs_Team_rolling,Hit_Outs_Team_rolling,Hit_Outs_To_Advantage_Team_rolling,Inside_50s_Team_rolling,Intercept_Marks_Team_rolling,GIntercepts_Team_rolling,Kicks_Team_rolling,Marks_Team_rolling,Marks_On_Lead_Team_rolling,Metres_Gained_Team_rolling,Pressure_Acts_Team_rolling,Rebound_50s_Team_rolling,Shots_At_Goal_Team_rolling,Spoils_Team_rolling,Stoppage_Clearances_Team_rolling,Clearances_Team_rolling,Possessions_Team_rolling,Turnovers_Team_rolling,Uncontested_Possessions_Team_rolling,Coaches_Votes_Team_rolling,Brownlow_Votes_Team_rolling,AFL_Fantasy_Points_Team_rolling,Player_Rating_Points_Team_rolling,AFL_Fantasy_Points_max_Team_rolling,Player_Rating_Points_max_Team_rolling,Margin_Opp_rolling,Total Game Score_Opp_rolling,Temperature_Opp_rolling,Year_Opp_rolling,Home_Opp_rolling,Result_Opp_rolling,Team_Score_Opp_rolling,Opp_Score_Opp_rolling,Team_Goals_Opp_rolling,Opp_Goals_Opp_rolling,Team_Behinds_Opp_rolling,Opp_Behinds_Opp_rolling,Day_Opp_rolling,Month_Opp_rolling,Days_Since_Previous_Match_Opp_rolling,Opp_Days_Since_Previous_Match_Opp_rolling,Team_Number_Matches_30D_Opp_rolling,Opponent_Number_Matches_30D_Opp_rolling,Team_Number_Matches_7D_Opp_rolling,Opponent_Number_Matches_7D_Opp_rolling,Age_avg_Opp_rolling,Height_avg_Opp_rolling,Weight_avg_Opp_rolling,Debut_Year_avg_Opp_rolling,Draft_Year_avg_Opp_rolling,Draft_Position_avg_Opp_rolling,Centre_Clearances_Opp_rolling,Clangers_Opp_rolling,Defensive_Contest_Losses_Opp_rolling,Contested_Marks_Opp_rolling,Contested_Possessions_Opp_rolling,Offensive_Contest_Wins_Opp_rolling,Defensive_Half_Pressure_Acts_Opp_rolling,Disposals_Opp_rolling,Effective_Disposals_Opp_rolling,Effective_Kicks_Opp_rolling,Inside_50_Ground_Ball_Gets_Opp_rolling,Frees_Against_Opp_rolling,Frees_For_Opp_rolling,Goal_Assists_Opp_rolling,Ground_Ball_Gets_Opp_rolling,Handballs_Opp_rolling,Hit_Outs_Opp_rolling,Hit_Outs_To_Advantage_Opp_rolling,Inside_50s_Opp_rolling,Intercept_Marks_Opp_rolling,GIntercepts_Opp_rolling,Kicks_Opp_rolling,Marks_Opp_rolling,Marks_On_Lead_Opp_rolling,Metres_Gained_Opp_rolling,Pressure_Acts_Opp_rolling,Rebound_50s_Opp_rolling,Shots_At_Goal_Opp_rolling,Spoils_Opp_rolling,Stoppage_Clearances_Opp_rolling,Clearances_Opp_rolling,Possessions_Opp_rolling,Turnovers_Opp_rolling,Uncontested_Possessions_Opp_rolling,Coaches_Votes_Opp_rolling,Brownlow_Votes_Opp_rolling,AFL_Fantasy_Points_Opp_rolling,Player_Rating_Points_Opp_rolling,AFL_Fantasy_Points_max_Opp_rolling,Player_Rating_Points_max_Opp_rolling,Team_Adelaide,Team_Brisbane Lions,Team_Carlton,Team_Collingwood,Team_Essendon,Team_Fremantle,Team_Geelong,Team_Gold Coast,Team_Greater Western Sydney,Team_Hawthorn,Team_Melbourne,Team_North Melbourne,Team_Port Adelaide,Team_Richmond,Team_St Kilda,Team_Sydney,Team_West Coast,Team_Western Bulldogs,Opponent_Adelaide,Opponent_Brisbane Lions,Opponent_Carlton,Opponent_Collingwood,Opponent_Essendon,Opponent_Fremantle,Opponent_Geelong,Opponent_Gold Coast,Opponent_Greater Western Sydney,Opponent_Hawthorn,Opponent_Melbourne,Opponent_North Melbourne,Opponent_Port Adelaide,Opponent_Richmond,Opponent_St Kilda,Opponent_Sydney,Opponent_West Coast,Opponent_Western Bulldogs,Venue_Adelaide Oval,Venue_Bellerive Oval,Venue_Carrara,Venue_Cazalys Stadium,Venue_Docklands,Venue_Eureka Stadium,Venue_Gabba,Venue_Kardinia Park,Venue_M.C.G.,Venue_Manuka Oval,Venue_Marrara Oval,Venue_Norwood Oval,Venue_Perth Stadium,Venue_S.C.G.,Venue_Stadium Australia,Venue_Sydney Showground,Venue_Traeger Park,Venue_York Park,City_Adelaide,City_Alice Springs,City_Ballarat,City_Brisbane,City_Cairns,City_Canberra,City_Darwin,City_Geelong,City_Gold Coast,City_Hobart,City_Launceston,City_Melbourne,City_Perth,City_Sydney
1247,2023-09-09,2023-09-15,28.0,184.666667,20.666667,2023.0,0.666667,1.0,106.333333,78.333333,15.666667,11.666667,12.333333,8.333333,17.666667,8.333333,9.333333,9.0,3.666667,3.666667,0.333333,0.666667,25.811594,187.797101,87.188406,2016.449275,2015.927536,26.733333,12.666667,54.0,3.0,12.666667,132.0,2.0,82.0,320.0,165.666667,98.666667,13.333333,19.0,20.333333,9.333333,59.0,112.333333,36.333333,5.333333,58.666667,11.333333,64.333333,207.666667,94.0,5.0,5609.666667,186.0,30.333333,29.333333,20.666667,26.666667,39.333333,316.333333,57.333333,184.333333,25.0,0.0,1452.666667,138.966667,104.666667,,8.0,154.666667,17.333333,2023.0,0.333333,0.333333,81.333333,73.333333,11.333333,11.0,13.333333,7.333333,16.666667,8.666667,9.333333,9.0,3.333333,3.333333,0.0,0.333333,25.724638,187.449275,87.333333,2016.753623,2015.565217,27.174603,9.666667,71.0,0.333333,12.333333,141.333333,1.0,55.0,371.333333,88.666667,49.0,4.0,19.333333,17.0,8.0,28.0,150.0,34.0,5.0,53.666667,8.666667,82.666667,221.333333,94.666667,2.666667,6142.333333,106.0,43.666667,26.666667,11.666667,28.0,37.666667,363.666667,81.333333,222.333333,15.666667,0.0,1548.333333,70.366667,114.333333,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1248,2023-09-16,2023-09-07,26.333333,173.0,16.666667,2023.0,0.0,1.0,99.666667,73.333333,14.666667,10.333333,11.666667,11.333333,17.333333,8.666667,9.333333,9.666667,3.333333,3.333333,0.333333,0.333333,25.768116,189.42029,88.724638,2017.0,2015.521739,24.936508,12.0,53.0,0.666667,7.333333,122.0,0.333333,57.333333,323.666667,90.333333,48.666667,5.0,13.0,12.666667,9.666667,34.333333,142.666667,29.0,2.333333,50.333333,5.666667,63.0,181.0,62.666667,1.333333,5318.333333,113.666667,37.666667,27.0,14.666667,23.666667,35.666667,318.333333,61.0,196.333333,29.333333,0.0,1293.666667,75.566667,102.666667,,-17.666667,156.333333,17.666667,2023.0,0.333333,0.333333,69.333333,87.0,9.666667,13.333333,11.333333,7.0,16.666667,8.333333,7.666667,9.0,3.666667,3.666667,0.666667,0.666667,25.463768,187.231884,87.362319,2016.927536,2016.086957,24.295238,11.0,54.333333,1.333333,10.666667,124.333333,1.666667,103.333333,338.333333,180.666667,99.0,10.333333,16.666667,17.666667,5.666667,55.333333,136.0,34.333333,6.0,51.0,9.0,64.666667,202.333333,91.0,3.666667,5279.666667,194.666667,32.0,21.666667,17.666667,23.0,34.0,333.0,61.0,208.666667,12.0,0.0,1440.666667,126.133333,118.333333,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1249,2023-09-07,2023-09-16,17.666667,156.333333,17.666667,2023.0,0.666667,0.666667,87.0,69.333333,13.333333,9.666667,7.0,11.333333,16.666667,8.333333,9.0,7.666667,3.666667,3.666667,0.666667,0.666667,27.434783,188.492754,88.695652,2015.147563,2014.246377,29.412698,11.0,59.0,1.666667,8.333333,123.333333,1.333333,86.333333,334.333333,183.0,97.0,9.333333,17.666667,16.666667,10.0,56.0,134.0,38.666667,6.0,46.0,8.0,61.666667,200.333333,80.0,6.0,5423.0,196.0,41.333333,22.0,20.333333,24.0,35.0,323.0,64.0,199.666667,18.0,0.0,1459.333333,142.666667,111.333333,,-26.333333,173.0,16.666667,2023.0,1.0,0.0,73.333333,99.666667,10.333333,14.666667,11.333333,11.666667,17.333333,8.666667,9.666667,9.333333,3.333333,3.333333,0.333333,0.333333,25.217391,187.797101,87.289855,2017.391304,2016.130435,23.428571,10.0,49.666667,0.333333,12.333333,118.333333,0.666667,45.0,303.0,91.0,43.666667,3.333333,12.666667,13.0,6.333333,30.333333,132.666667,24.666667,2.333333,47.0,5.666667,61.333333,170.333333,64.666667,2.0,5069.333333,108.666667,36.666667,18.666667,10.333333,18.333333,28.333333,295.333333,63.0,177.0,0.666667,0.0,1221.0,66.933333,100.666667,,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1250,2023-09-22,2023-09-23,26.0,120.0,19.0,2023.0,0.666667,1.0,73.0,47.0,11.0,6.0,7.0,11.0,18.0,8.666667,11.666667,7.666667,3.0,3.333333,0.333333,0.666667,27.449275,188.478261,88.782609,2015.246377,2014.26087,27.873016,9.0,61.333333,1.0,8.333333,128.0,1.0,41.666667,341.333333,101.333333,53.333333,4.333333,15.0,13.333333,8.333333,29.333333,131.333333,38.0,2.666667,47.666667,4.333333,69.666667,210.0,88.333333,5.0,5694.333333,100.333333,42.333333,19.666667,10.0,27.666667,36.666667,333.333333,72.666667,205.333333,15.0,0.0,1487.333333,78.6,108.666667,,-25.333333,157.333333,24.333333,2023.0,0.0,0.0,66.0,91.333333,9.666667,13.0,8.0,13.333333,19.333333,8.666667,9.333333,12.0,3.333333,3.0,0.333333,0.0,25.304348,187.971014,87.608696,2017.405797,2016.173913,24.406999,10.333333,59.0,1.666667,14.0,130.333333,4.333333,94.0,322.333333,172.666667,96.333333,7.0,19.333333,16.666667,5.333333,59.666667,124.333333,34.333333,4.666667,44.0,16.333333,66.666667,198.0,85.666667,3.333333,5558.333333,179.666667,47.0,19.666667,21.0,22.333333,32.666667,312.666667,69.333333,182.333333,0.666667,0.0,1399.0,127.2,114.666667,,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1251,2023-09-23,2023-09-22,25.333333,157.333333,24.333333,2023.0,1.0,1.0,91.333333,66.0,13.0,9.666667,13.333333,8.0,19.333333,8.666667,12.0,9.333333,3.0,3.333333,0.0,0.333333,25.927536,187.724638,87.0,2016.362319,2015.869565,26.2,11.666667,56.0,4.333333,11.333333,133.666667,1.666667,67.333333,318.0,158.333333,99.666667,12.666667,16.666667,19.333333,8.333333,61.666667,105.333333,35.666667,7.0,61.333333,13.0,69.333333,212.666667,96.666667,5.666667,5832.333333,182.666667,34.666667,27.0,26.666667,29.333333,41.0,313.666667,65.666667,180.0,18.0,0.0,1441.666667,136.366667,104.666667,,-26.0,120.0,19.0,2023.0,0.333333,0.0,47.0,73.0,6.0,11.0,11.0,7.0,18.0,8.666667,7.666667,11.666667,3.333333,3.0,0.666667,0.333333,25.391304,187.84058,87.913043,2017.173913,2016.014493,23.38254,8.666667,54.666667,1.0,11.666667,130.666667,1.0,50.666667,349.0,97.333333,50.333333,4.0,13.333333,15.0,4.0,26.0,144.0,29.333333,3.0,48.666667,4.0,72.666667,205.0,88.666667,2.0,5479.666667,103.666667,36.666667,18.333333,8.666667,21.0,29.666667,341.666667,68.666667,211.0,5.0,0.0,1437.333333,53.533333,118.333333,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [150]:
from sklearn.base import BaseEstimator, TransformerMixin
class MatchOutcomePreprocessor(BaseEstimator, TransformerMixin):
    def __init__(self, categorical_features):
        self.categorical_features = categorical_features
        self.create_match_summary_pipeline()
        self.create_player_stats_pipeline()
        self.create_rolling_average_pipeline()
        self.create_ohe()
    
    def create_match_summary_pipeline(self):
        self.match_summary_pipeline = Pipeline([
            ('home_away_team_opp', home_away_team_opp_transformer),
            ('create_scores', create_score_transformer),
            ('date_transformer', date_transformer)
        ])
        
    def create_player_stats_pipeline(self):
        self.player_stats_pipeline = Pipeline([
            ('team_stats', team_stats_transformer)
        ])

    def create_rolling_average_pipeline(self):
        self.rolling_pipeline = Pipeline([
            ('rolling_averages', rolling_average_transformer)
        ])
        
    def create_ohe(self):
        self.ohe = ColumnTransformer(
            transformers=[
                ('ohe', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), categorical_features)
            ]
        )
    
    def fit(self, X, y=None):
        return self

    def transform(self, match_summary, player_stats):
        match_summary_transformed = self.match_summary_pipeline.fit_transform(match_summary)
        team_stats = self.player_stats_pipeline.fit_transform(player_stats)
        
        match_outcome_data = match_summary_transformed.merge(team_stats, how = "left", on = ["Match_ID", "Team"])

        rolling_match_outcome_data = self.rolling_pipeline.fit_transform(match_outcome_data)

        ohe_data = pd.DataFrame(self.ohe.fit_transform(rolling_match_outcome_data), columns=[x.replace("ohe__", "") for x in self.ohe.get_feature_names_out()])

        data = pd.concat([rolling_match_outcome_data, ohe_data], axis=1)
        data = data.drop(columns=self.categorical_features + ['Match_ID', 'Date'])
        
        return data


In [152]:
match_outcome_preproc = MatchOutcomePreprocessor(categorical_features = ['Team', 'Opponent', 'Venue', 'City'])
data = match_outcome_preproc.transform(match_summary, player_stats)
data.tail()

Unnamed: 0,Date_Previous_Match,Opp_Date_Previous_Match,Margin_Team_rolling,Total Game Score_Team_rolling,Temperature_Team_rolling,Year_Team_rolling,Home_Team_rolling,Result_Team_rolling,Team_Score_Team_rolling,Opp_Score_Team_rolling,Team_Goals_Team_rolling,Opp_Goals_Team_rolling,Team_Behinds_Team_rolling,Opp_Behinds_Team_rolling,Day_Team_rolling,Month_Team_rolling,Days_Since_Previous_Match_Team_rolling,Opp_Days_Since_Previous_Match_Team_rolling,Team_Number_Matches_30D_Team_rolling,Opponent_Number_Matches_30D_Team_rolling,Team_Number_Matches_7D_Team_rolling,Opponent_Number_Matches_7D_Team_rolling,Age_avg_Team_rolling,Height_avg_Team_rolling,Weight_avg_Team_rolling,Debut_Year_avg_Team_rolling,Draft_Year_avg_Team_rolling,Draft_Position_avg_Team_rolling,Centre_Clearances_Team_rolling,Clangers_Team_rolling,Defensive_Contest_Losses_Team_rolling,Contested_Marks_Team_rolling,Contested_Possessions_Team_rolling,Offensive_Contest_Wins_Team_rolling,Defensive_Half_Pressure_Acts_Team_rolling,Disposals_Team_rolling,Effective_Disposals_Team_rolling,Effective_Kicks_Team_rolling,Inside_50_Ground_Ball_Gets_Team_rolling,Frees_Against_Team_rolling,Frees_For_Team_rolling,Goal_Assists_Team_rolling,Ground_Ball_Gets_Team_rolling,Handballs_Team_rolling,Hit_Outs_Team_rolling,Hit_Outs_To_Advantage_Team_rolling,Inside_50s_Team_rolling,Intercept_Marks_Team_rolling,GIntercepts_Team_rolling,Kicks_Team_rolling,Marks_Team_rolling,Marks_On_Lead_Team_rolling,Metres_Gained_Team_rolling,Pressure_Acts_Team_rolling,Rebound_50s_Team_rolling,Shots_At_Goal_Team_rolling,Spoils_Team_rolling,Stoppage_Clearances_Team_rolling,Clearances_Team_rolling,Possessions_Team_rolling,Turnovers_Team_rolling,Uncontested_Possessions_Team_rolling,Coaches_Votes_Team_rolling,Brownlow_Votes_Team_rolling,AFL_Fantasy_Points_Team_rolling,Player_Rating_Points_Team_rolling,AFL_Fantasy_Points_max_Team_rolling,Player_Rating_Points_max_Team_rolling,Margin_Opp_rolling,Total Game Score_Opp_rolling,Temperature_Opp_rolling,Year_Opp_rolling,Home_Opp_rolling,Result_Opp_rolling,Team_Score_Opp_rolling,Opp_Score_Opp_rolling,Team_Goals_Opp_rolling,Opp_Goals_Opp_rolling,Team_Behinds_Opp_rolling,Opp_Behinds_Opp_rolling,Day_Opp_rolling,Month_Opp_rolling,Days_Since_Previous_Match_Opp_rolling,Opp_Days_Since_Previous_Match_Opp_rolling,Team_Number_Matches_30D_Opp_rolling,Opponent_Number_Matches_30D_Opp_rolling,Team_Number_Matches_7D_Opp_rolling,Opponent_Number_Matches_7D_Opp_rolling,Age_avg_Opp_rolling,Height_avg_Opp_rolling,Weight_avg_Opp_rolling,Debut_Year_avg_Opp_rolling,Draft_Year_avg_Opp_rolling,Draft_Position_avg_Opp_rolling,Centre_Clearances_Opp_rolling,Clangers_Opp_rolling,Defensive_Contest_Losses_Opp_rolling,Contested_Marks_Opp_rolling,Contested_Possessions_Opp_rolling,Offensive_Contest_Wins_Opp_rolling,Defensive_Half_Pressure_Acts_Opp_rolling,Disposals_Opp_rolling,Effective_Disposals_Opp_rolling,Effective_Kicks_Opp_rolling,Inside_50_Ground_Ball_Gets_Opp_rolling,Frees_Against_Opp_rolling,Frees_For_Opp_rolling,Goal_Assists_Opp_rolling,Ground_Ball_Gets_Opp_rolling,Handballs_Opp_rolling,Hit_Outs_Opp_rolling,Hit_Outs_To_Advantage_Opp_rolling,Inside_50s_Opp_rolling,Intercept_Marks_Opp_rolling,GIntercepts_Opp_rolling,Kicks_Opp_rolling,Marks_Opp_rolling,Marks_On_Lead_Opp_rolling,Metres_Gained_Opp_rolling,Pressure_Acts_Opp_rolling,Rebound_50s_Opp_rolling,Shots_At_Goal_Opp_rolling,Spoils_Opp_rolling,Stoppage_Clearances_Opp_rolling,Clearances_Opp_rolling,Possessions_Opp_rolling,Turnovers_Opp_rolling,Uncontested_Possessions_Opp_rolling,Coaches_Votes_Opp_rolling,Brownlow_Votes_Opp_rolling,AFL_Fantasy_Points_Opp_rolling,Player_Rating_Points_Opp_rolling,AFL_Fantasy_Points_max_Opp_rolling,Player_Rating_Points_max_Opp_rolling,Team_Adelaide,Team_Brisbane Lions,Team_Carlton,Team_Collingwood,Team_Essendon,Team_Fremantle,Team_Geelong,Team_Gold Coast,Team_Greater Western Sydney,Team_Hawthorn,Team_Melbourne,Team_North Melbourne,Team_Port Adelaide,Team_Richmond,Team_St Kilda,Team_Sydney,Team_West Coast,Team_Western Bulldogs,Opponent_Adelaide,Opponent_Brisbane Lions,Opponent_Carlton,Opponent_Collingwood,Opponent_Essendon,Opponent_Fremantle,Opponent_Geelong,Opponent_Gold Coast,Opponent_Greater Western Sydney,Opponent_Hawthorn,Opponent_Melbourne,Opponent_North Melbourne,Opponent_Port Adelaide,Opponent_Richmond,Opponent_St Kilda,Opponent_Sydney,Opponent_West Coast,Opponent_Western Bulldogs,Venue_Adelaide Oval,Venue_Bellerive Oval,Venue_Carrara,Venue_Cazalys Stadium,Venue_Docklands,Venue_Eureka Stadium,Venue_Gabba,Venue_Kardinia Park,Venue_M.C.G.,Venue_Manuka Oval,Venue_Marrara Oval,Venue_Norwood Oval,Venue_Perth Stadium,Venue_S.C.G.,Venue_Stadium Australia,Venue_Sydney Showground,Venue_Traeger Park,Venue_York Park,City_Adelaide,City_Alice Springs,City_Ballarat,City_Brisbane,City_Cairns,City_Canberra,City_Darwin,City_Geelong,City_Gold Coast,City_Hobart,City_Launceston,City_Melbourne,City_Perth,City_Sydney
1247,2023-09-09,2023-09-15,28.0,184.666667,20.666667,2023.0,0.666667,1.0,106.333333,78.333333,15.666667,11.666667,12.333333,8.333333,17.666667,8.333333,9.333333,9.0,3.666667,3.666667,0.333333,0.666667,25.811594,187.797101,87.188406,2016.449275,2015.927536,26.733333,12.666667,54.0,3.0,12.666667,132.0,2.0,82.0,320.0,165.666667,98.666667,13.333333,19.0,20.333333,9.333333,59.0,112.333333,36.333333,5.333333,58.666667,11.333333,64.333333,207.666667,94.0,5.0,5609.666667,186.0,30.333333,29.333333,20.666667,26.666667,39.333333,316.333333,57.333333,184.333333,25.0,0.0,1452.666667,138.966667,104.666667,,8.0,154.666667,17.333333,2023.0,0.333333,0.333333,81.333333,73.333333,11.333333,11.0,13.333333,7.333333,16.666667,8.666667,9.333333,9.0,3.333333,3.333333,0.0,0.333333,25.724638,187.449275,87.333333,2016.753623,2015.565217,27.174603,9.666667,71.0,0.333333,12.333333,141.333333,1.0,55.0,371.333333,88.666667,49.0,4.0,19.333333,17.0,8.0,28.0,150.0,34.0,5.0,53.666667,8.666667,82.666667,221.333333,94.666667,2.666667,6142.333333,106.0,43.666667,26.666667,11.666667,28.0,37.666667,363.666667,81.333333,222.333333,15.666667,0.0,1548.333333,70.366667,114.333333,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1248,2023-09-16,2023-09-07,26.333333,173.0,16.666667,2023.0,0.0,1.0,99.666667,73.333333,14.666667,10.333333,11.666667,11.333333,17.333333,8.666667,9.333333,9.666667,3.333333,3.333333,0.333333,0.333333,25.768116,189.42029,88.724638,2017.0,2015.521739,24.936508,12.0,53.0,0.666667,7.333333,122.0,0.333333,57.333333,323.666667,90.333333,48.666667,5.0,13.0,12.666667,9.666667,34.333333,142.666667,29.0,2.333333,50.333333,5.666667,63.0,181.0,62.666667,1.333333,5318.333333,113.666667,37.666667,27.0,14.666667,23.666667,35.666667,318.333333,61.0,196.333333,29.333333,0.0,1293.666667,75.566667,102.666667,,-17.666667,156.333333,17.666667,2023.0,0.333333,0.333333,69.333333,87.0,9.666667,13.333333,11.333333,7.0,16.666667,8.333333,7.666667,9.0,3.666667,3.666667,0.666667,0.666667,25.463768,187.231884,87.362319,2016.927536,2016.086957,24.295238,11.0,54.333333,1.333333,10.666667,124.333333,1.666667,103.333333,338.333333,180.666667,99.0,10.333333,16.666667,17.666667,5.666667,55.333333,136.0,34.333333,6.0,51.0,9.0,64.666667,202.333333,91.0,3.666667,5279.666667,194.666667,32.0,21.666667,17.666667,23.0,34.0,333.0,61.0,208.666667,12.0,0.0,1440.666667,126.133333,118.333333,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1249,2023-09-07,2023-09-16,17.666667,156.333333,17.666667,2023.0,0.666667,0.666667,87.0,69.333333,13.333333,9.666667,7.0,11.333333,16.666667,8.333333,9.0,7.666667,3.666667,3.666667,0.666667,0.666667,27.434783,188.492754,88.695652,2015.147563,2014.246377,29.412698,11.0,59.0,1.666667,8.333333,123.333333,1.333333,86.333333,334.333333,183.0,97.0,9.333333,17.666667,16.666667,10.0,56.0,134.0,38.666667,6.0,46.0,8.0,61.666667,200.333333,80.0,6.0,5423.0,196.0,41.333333,22.0,20.333333,24.0,35.0,323.0,64.0,199.666667,18.0,0.0,1459.333333,142.666667,111.333333,,-26.333333,173.0,16.666667,2023.0,1.0,0.0,73.333333,99.666667,10.333333,14.666667,11.333333,11.666667,17.333333,8.666667,9.666667,9.333333,3.333333,3.333333,0.333333,0.333333,25.217391,187.797101,87.289855,2017.391304,2016.130435,23.428571,10.0,49.666667,0.333333,12.333333,118.333333,0.666667,45.0,303.0,91.0,43.666667,3.333333,12.666667,13.0,6.333333,30.333333,132.666667,24.666667,2.333333,47.0,5.666667,61.333333,170.333333,64.666667,2.0,5069.333333,108.666667,36.666667,18.666667,10.333333,18.333333,28.333333,295.333333,63.0,177.0,0.666667,0.0,1221.0,66.933333,100.666667,,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1250,2023-09-22,2023-09-23,26.0,120.0,19.0,2023.0,0.666667,1.0,73.0,47.0,11.0,6.0,7.0,11.0,18.0,8.666667,11.666667,7.666667,3.0,3.333333,0.333333,0.666667,27.449275,188.478261,88.782609,2015.246377,2014.26087,27.873016,9.0,61.333333,1.0,8.333333,128.0,1.0,41.666667,341.333333,101.333333,53.333333,4.333333,15.0,13.333333,8.333333,29.333333,131.333333,38.0,2.666667,47.666667,4.333333,69.666667,210.0,88.333333,5.0,5694.333333,100.333333,42.333333,19.666667,10.0,27.666667,36.666667,333.333333,72.666667,205.333333,15.0,0.0,1487.333333,78.6,108.666667,,-25.333333,157.333333,24.333333,2023.0,0.0,0.0,66.0,91.333333,9.666667,13.0,8.0,13.333333,19.333333,8.666667,9.333333,12.0,3.333333,3.0,0.333333,0.0,25.304348,187.971014,87.608696,2017.405797,2016.173913,24.406999,10.333333,59.0,1.666667,14.0,130.333333,4.333333,94.0,322.333333,172.666667,96.333333,7.0,19.333333,16.666667,5.333333,59.666667,124.333333,34.333333,4.666667,44.0,16.333333,66.666667,198.0,85.666667,3.333333,5558.333333,179.666667,47.0,19.666667,21.0,22.333333,32.666667,312.666667,69.333333,182.333333,0.666667,0.0,1399.0,127.2,114.666667,,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1251,2023-09-23,2023-09-22,25.333333,157.333333,24.333333,2023.0,1.0,1.0,91.333333,66.0,13.0,9.666667,13.333333,8.0,19.333333,8.666667,12.0,9.333333,3.0,3.333333,0.0,0.333333,25.927536,187.724638,87.0,2016.362319,2015.869565,26.2,11.666667,56.0,4.333333,11.333333,133.666667,1.666667,67.333333,318.0,158.333333,99.666667,12.666667,16.666667,19.333333,8.333333,61.666667,105.333333,35.666667,7.0,61.333333,13.0,69.333333,212.666667,96.666667,5.666667,5832.333333,182.666667,34.666667,27.0,26.666667,29.333333,41.0,313.666667,65.666667,180.0,18.0,0.0,1441.666667,136.366667,104.666667,,-26.0,120.0,19.0,2023.0,0.333333,0.0,47.0,73.0,6.0,11.0,11.0,7.0,18.0,8.666667,7.666667,11.666667,3.333333,3.0,0.666667,0.333333,25.391304,187.84058,87.913043,2017.173913,2016.014493,23.38254,8.666667,54.666667,1.0,11.666667,130.666667,1.0,50.666667,349.0,97.333333,50.333333,4.0,13.333333,15.0,4.0,26.0,144.0,29.333333,3.0,48.666667,4.0,72.666667,205.0,88.666667,2.0,5479.666667,103.666667,36.666667,18.333333,8.666667,21.0,29.666667,341.666667,68.666667,211.0,5.0,0.0,1437.333333,53.533333,118.333333,,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


Create Labels

In [123]:
home_away_team_opp_transformer.fit_transform(match_summary).head()

Unnamed: 0,Team,Opponent,Q4_Score,Margin,Total Game Score,Venue,City,Date,Temperature,Year,Match_ID,Home,Result
0,Adelaide,Geelong,15.13.103 - 13.13.91,12,194,Adelaide Oval,Adelaide,2021-03-20 16:35:00,18.0,2021,202101_Adelaide_Geelong,1,1
1,Geelong,Adelaide,15.13.103 - 13.13.91,-12,194,Adelaide Oval,Adelaide,2021-03-20 16:35:00,18.0,2021,202101_Adelaide_Geelong,0,0
2,Brisbane Lions,Sydney,14.10.94 - 19.11.125,-31,219,Gabba,Brisbane,2021-03-20 19:45:00,18.0,2021,202101_BrisbaneLions_Sydney,1,0
3,Sydney,Brisbane Lions,14.10.94 - 19.11.125,31,219,Gabba,Brisbane,2021-03-20 19:45:00,18.0,2021,202101_BrisbaneLions_Sydney,0,1
4,Collingwood,Western Bulldogs,7.11.53 - 10.9.69,-16,122,M.C.G.,Melbourne,2021-03-19 19:50:00,27.0,2021,202101_Collingwood_WesternBulldogs,1,0


In [141]:
def get_response(data):
    return data['Result']

response_transformer = FunctionTransformer(func=get_response, validate=False)

In [143]:
response_pipeline = Pipeline([
    ('home_away_team_opp', home_away_team_opp_transformer),
    ('response', response_transformer)
])

In [145]:
y = response_pipeline.fit_transform(match_summary)

In [146]:
y.value_counts()

Result
1    626
0    626
Name: count, dtype: int64