In [1]:
import pandas as pd

In [16]:
results = pd.read_parquet('../data/raw/game_results.parquet')

In [17]:
results

Unnamed: 0,season,round,venue,date,home_team,home_team_points,away_team,away_team_points
0,2018,25,"Stadium Australia, Sydney",2018-08-30,Rabbitohs,51,Wests Tigers,10
1,2018,25,"Mt Smart Stadium, Auckland",2018-08-31,Warriors,20,Raiders,16
2,2018,25,"AAMI Park, Melbourne",2018-08-31,Storm,16,Panthers,22
3,2018,25,"McDonald Jones Stadium, Newcastle",2018-09-01,Knights,14,Dragons,24
4,2018,25,"Cbus Super Stadium, Gold Coast",2018-09-01,Titans,26,Cowboys,30
...,...,...,...,...,...,...,...,...
1127,2023,1,"4 Pines Park, Sydney",2023-03-04,Sea Eagles,,Bulldogs,
1128,2023,1,"Queensland Country Bank Stadium, Townsville",2023-03-04,Cowboys,,Raiders,
1129,2023,1,"PointsBet Stadium, Sydney",2023-03-04,Sharks,,Rabbitohs,
1130,2023,1,"Suncorp Stadium, Brisbane",2023-03-05,Dolphins,,Roosters,


In [116]:

def result_expander(rdf):
    '''
    Duplicate results table such that each game has a row where each team is prime and opt.

    This creates a dataframe that can be used for creating lagged effects on each team's performance.
    '''
    
    results_home = rdf.copy()
    results_away = rdf.copy()
    results_home = results_home.assign(home_away='home')
    results_away = results_away.assign(home_away='away')

    home_cols = ['season', 'round', 'venue', 'date', 'prime_team', 'prime_team_points',
                 'opt_team', 'opt_team_points', 'home_away']
    away_cols = ['season', 'round', 'venue', 'date', 'opt_team', 'opt_team_points',
                 'prime_team', 'prime_team_points', 'home_away']

    results_home.columns = home_cols
    results_away.columns = away_cols
    
    # select the columns in the same order before concatenating the tables, we've assigned them above so this works but looks odd.

    all_results = pd.concat([results_home.loc[:, home_cols], results_away.loc[:, home_cols]]) 
    all_results = all_results.assign(prime_team_points=all_results.prime_team_points.astype('float'))
    all_results = all_results.assign(opt_team_points=all_results.opt_team_points.astype('float'))
    all_results = all_results.assign(prime_margin=all_results.prime_team_points - all_results.opt_team_points)
    all_results = all_results.reset_index(drop=True)
    return all_results

    


In [117]:
training = result_expander(results)
training = training[~training.prime_team_points.isna()]
training = training.sort_values('date')

#expanded_results = expanded_results.assign()

In [118]:
training.head(20)

Unnamed: 0,season,round,venue,date,prime_team,prime_team_points,opt_team,opt_team_points,home_away,prime_margin
1316,2018,1,"Netstrata Jubilee Stadium, Sydney",2018-03-08,Broncos,12.0,Dragons,34.0,away,-22.0
184,2018,1,"Netstrata Jubilee Stadium, Sydney",2018-03-08,Dragons,34.0,Broncos,12.0,home,22.0
1318,2018,1,"1300SMILES Stadium, Townsville",2018-03-09,Sharks,14.0,Cowboys,20.0,away,-6.0
1317,2018,1,"McDonald Jones Stadium, Newcastle",2018-03-09,Sea Eagles,18.0,Knights,19.0,away,-1.0
185,2018,1,"McDonald Jones Stadium, Newcastle",2018-03-09,Knights,19.0,Sea Eagles,18.0,home,1.0
186,2018,1,"1300SMILES Stadium, Townsville",2018-03-09,Cowboys,20.0,Sharks,14.0,home,6.0
1321,2018,1,"Optus Stadium, Perth",2018-03-10,Storm,36.0,Bulldogs,18.0,away,18.0
1320,2018,1,"Optus Stadium, Perth",2018-03-10,Warriors,32.0,Rabbitohs,20.0,away,12.0
1319,2018,1,"Stadium Australia, Sydney",2018-03-10,Roosters,8.0,Wests Tigers,10.0,away,-2.0
187,2018,1,"Stadium Australia, Sydney",2018-03-10,Wests Tigers,10.0,Roosters,8.0,home,2.0


In [119]:
rolling_window_size = 6
rolling_mean_cols = ['prime_team_points', 'prime_margin']
rolling = training.groupby('prime_team').rolling(rolling_window_size)[rolling_mean_cols].mean().reset_index(level=0)#.set_index('level_1')
rolling = rolling.loc[:, [*rolling_mean_cols]]
rolling.columns = [f'{r}_rolling' for r in rolling_mean_cols]
training_aug = training.merge(rolling, left_index=True, right_index=True)


In [120]:
training

Unnamed: 0,season,round,venue,date,prime_team,prime_team_points,opt_team,opt_team_points,home_away,prime_margin
1316,2018,1,"Netstrata Jubilee Stadium, Sydney",2018-03-08,Broncos,12.0,Dragons,34.0,away,-22.0
184,2018,1,"Netstrata Jubilee Stadium, Sydney",2018-03-08,Dragons,34.0,Broncos,12.0,home,22.0
1318,2018,1,"1300SMILES Stadium, Townsville",2018-03-09,Sharks,14.0,Cowboys,20.0,away,-6.0
1317,2018,1,"McDonald Jones Stadium, Newcastle",2018-03-09,Sea Eagles,18.0,Knights,19.0,away,-1.0
185,2018,1,"McDonald Jones Stadium, Newcastle",2018-03-09,Knights,19.0,Sea Eagles,18.0,home,1.0
...,...,...,...,...,...,...,...,...,...,...
1873,2022,25,"Queensland Country Bank Stadium, Townsville",2022-09-03,Panthers,8.0,Cowboys,38.0,away,-30.0
1875,2022,25,"Leichhardt Oval, Sydney",2022-09-04,Raiders,56.0,Wests Tigers,10.0,away,46.0
743,2022,25,"Leichhardt Oval, Sydney",2022-09-04,Wests Tigers,10.0,Raiders,56.0,home,-46.0
742,2022,25,"McDonald Jones Stadium, Newcastle",2022-09-04,Knights,16.0,Sharks,38.0,home,-22.0


In [135]:
training_aug_copy = training_aug.loc[:, ['venue', 'date', 'opt_team', 'prime_team_points_rolling', 'prime_margin_rolling']].copy()
training_aug_copy.columns =  ['venue', 'date', 'opt_team', 'opt_team_points_rolling', 'opt_margin_rolling']

training_df = training_aug.merge(training_aug_copy, 
                          left_on=['venue', 'date', 'prime_team'], 
                          right_on=['venue', 'date', 'opt_team'],
                        )

training_df = training_df.drop(columns='opt_team_y')
training_df = training_df.rename(columns={'opt_team_x':'opt_team'})
training_df.loc[(training_df.prime_team == 'Broncos')].head(10)

Unnamed: 0,season,round,venue,date,prime_team,prime_team_points,opt_team,opt_team_points,home_away,prime_margin,prime_team_points_rolling,prime_margin_rolling,opt_team_points_rolling,opt_margin_rolling
0,2018,1,"Netstrata Jubilee Stadium, Sydney",2018-03-08,Broncos,12.0,Dragons,34.0,away,-22.0,,,,
19,2018,2,"Suncorp Stadium, Brisbane",2018-03-16,Broncos,24.0,Cowboys,20.0,home,4.0,,,,
37,2018,3,"Campbelltown Sports Stadium, Sydney",2018-03-23,Broncos,9.0,Wests Tigers,7.0,away,2.0,,,,
60,2018,4,"Suncorp Stadium, Brisbane",2018-04-01,Broncos,14.0,Titans,26.0,home,-12.0,,,,
73,2018,5,"McDonald Jones Stadium, Newcastle",2018-04-07,Broncos,10.0,Knights,15.0,away,-5.0,,,,
89,2018,6,"Mt Smart Stadium, Auckland",2018-04-14,Broncos,27.0,Warriors,18.0,away,9.0,16.0,-4.0,23.666667,8.333333
99,2018,7,"Suncorp Stadium, Brisbane",2018-04-20,Broncos,20.0,Storm,34.0,home,-14.0,17.333333,-2.666667,21.0,7.166667
116,2018,8,"Stadium Australia, Sydney",2018-04-26,Broncos,24.0,Rabbitohs,20.0,away,4.0,17.333333,-2.666667,25.666667,9.333333
128,2018,9,"Suncorp Stadium, Brisbane",2018-05-03,Broncos,22.0,Bulldogs,20.0,home,2.0,19.5,-2.666667,14.5,-3.166667
155,2018,10,"Suncorp Stadium, Brisbane",2018-05-12,Broncos,24.0,Sea Eagles,38.0,away,-14.0,21.166667,-3.0,18.666667,-11.0
