In [1]:
import joblib
from afl_match_outcome_model.data_preparation.data_loader import load_matches, load_player_stats, load_venues, load_team_info
from afl_match_outcome_model.data_preparation.preprocessing import merge_venue_info, merge_home_away_venue
from afl_match_outcome_model.data_preparation.match_id_utils import get_home_team_from_match_id, get_away_team_from_match_id
from afl_match_outcome_model.data_preparation.preprocessing import create_home_flag
from afl_match_outcome_model.data_preparation.aggregation import aggregate_player_stats_to_team_stats
from afl_match_outcome_model.data_preparation.preprocessing import merge_match_summary_team_stats
from afl_match_outcome_model.data_preparation.match_id_utils import get_season_from_match_id
from afl_match_outcome_model.data_preparation.elo import create_elo_ratings_home_away
from afl_match_outcome_model.data_preparation.feature_engineering import create_score_features, create_margin_features, create_win_features
from afl_match_outcome_model.data_preparation.rolling import create_team_rolling_features
from afl_match_outcome_model.data_preparation.feature_engineering import create_distance_travelled_feature, create_home_away_diff_feature
from afl_match_outcome_model.data_preparation.preprocessing import get_match

In [2]:
# Specify model to load
model_file_path = "../model_outputs/match_outcome_xgb.joblib"
super_xgb = joblib.load(model_file_path)
model_features = super_xgb.xgb_model.get_booster().feature_names

In [3]:
# Specify Match_ID to predict
match_id = "AFL_2023_F4_Collingwood_Brisbane"

# Load data
matches = load_matches()
player_stats = load_player_stats()
venue_info = load_venues()
home_info, away_info = load_team_info()

In [4]:
# Create Features
matches = merge_venue_info(matches, venue_info)
matches = merge_home_away_venue(matches, home_info, away_info, venue_info)

player_stats['Home_Team'] = player_stats['Match_ID'].apply(lambda match_id: get_home_team_from_match_id(match_id))
player_stats['Away_Team'] = player_stats['Match_ID'].apply(lambda match_id: get_away_team_from_match_id(match_id))
player_stats['Home'] = create_home_flag(player_stats)

In [5]:
team_stats = aggregate_player_stats_to_team_stats(player_stats, ['Player_Rating_Points'])
match_stats = merge_match_summary_team_stats(matches, team_stats)

match_stats = create_elo_ratings_home_away(match_stats, 32)

match_stats = create_score_features(match_stats)
match_stats = create_margin_features(match_stats)
match_stats = create_win_features(match_stats)

  team_sum = team_stats.groupby(["Match_ID", f"{team_type}_Team"]).agg(np.sum).reset_index()
  team_sum = team_stats.groupby(["Match_ID", f"{team_type}_Team"]).agg(np.sum).reset_index()


In [6]:
rolling_span = 5
rolling_feature_list = ['Win', 'Margin', 'Score', 'Goals', 'Player_Rating_Points', 'ELO']
match_stats = create_team_rolling_features(match_stats, rolling_feature_list, rolling_span)

match_stats = create_distance_travelled_feature(match_stats)

feature_diff_list = [
    'Win_For_ewm5',
    'Margin_For_ewm5',
    'Score_For_ewm5',
    'Goals_For_ewm5',
    'Player_Rating_Points_For_ewm5',
    'ELO_For_ewm5',
    'Distance_Travelled'
]

match_stats = create_home_away_diff_feature(match_stats, feature_diff_list)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_stats['Team'] = team
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_stats['Team'] = team
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  team_stats['Team'] = team
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See

In [7]:
# Get single match
data = get_match(match_stats, match_id)

In [8]:
data

Unnamed: 0,Home_Team,Away_Team,Q1_Score,Q2_Score,Q3_Score,Q4_Score,Margin,Total Game Score,Home Win,Venue,...,Away_ELO_Against_ewm5,Home_Distance_Travelled,Away_Distance_Travelled,Win_For_ewm5_diff,Margin_For_ewm5_diff,Score_For_ewm5_diff,Goals_For_ewm5_diff,Player_Rating_Points_For_ewm5_diff,ELO_For_ewm5_diff,Distance_Travelled_diff
4669,Collingwood,Brisbane,4.4.28 - 3.0.18,9.9.63 - 9.3.57,10.15.75 - 11.5.71,12.18.90 - 13.8.86,4,176,1.0,MCG,...,1576.836732,0.0,1371.407301,-0.129375,-11.099309,-17.781877,-2.355886,-6.755379,-0.60506,-1371.407301


In [9]:
# Predict
probas = super_xgb.predict_proba(data[model_features])

In [10]:
probas

array([[0.47673106, 0.52326894]], dtype=float32)