In [1]:
import pandas as pd
import numpy  as np

In [2]:
results = pd.read_parquet('../data/raw/game_results.parquet')

In [3]:
def result_expander(rdf):
    '''
    Duplicate results table such that each game has a row where each team is prime and opt.

    This creates a dataframe that can be used for creating lagged effects on each team's performance.
    '''
    results_home = rdf.copy()
    results_away = rdf.copy()
    results_home = results_home.assign(home_away='home')
    results_away = results_away.assign(home_away='away')

    home_cols = ['season', 'round', 'venue', 'date', 'prime_team', 'prime_team_points',
                 'opt_team', 'opt_team_points', 'home_away']
    away_cols = ['season', 'round', 'venue', 'date', 'opt_team', 'opt_team_points',
                 'prime_team', 'prime_team_points', 'home_away']

    results_home.columns = home_cols
    results_away.columns = away_cols

    all_results = pd.concat([results_home.loc[:, home_cols], results_away.loc[:, home_cols]]) 
    all_results = all_results.assign(prime_team_points=all_results.prime_team_points.astype('float'))
    all_results = all_results.assign(opt_team_points=all_results.opt_team_points.astype('float'))
    all_results = all_results.assign(prime_margin=all_results.prime_team_points - all_results.opt_team_points)
    all_results=all_results.assign(result=np.sign(all_results.prime_margin))
    all_results = all_results.reset_index(drop=True)
    
        ## make categorical things categorical types
    all_results=all_results.assign(home_away=all_results.home_away.astype('category'))
    all_results=all_results.assign(venue=all_results.venue.astype('category'))
    all_results=all_results.assign(prime_team=all_results.prime_team.astype('category'))
    all_results=all_results.assign(opt_team=all_results.opt_team.astype('category'))
    return all_results

    
def create_features(erdf):
    erdf=erdf.assign(prime_margin_1=erdf.sort_values('date').groupby('prime_team')['prime_margin'].shift(1))
    erdf=erdf.assign(prime_margin_2=erdf.sort_values('date').groupby('prime_team')['prime_margin'].shift(2))
    erdf=erdf.assign(prime_margin_3=erdf.sort_values('date').groupby('prime_team')['prime_margin'].shift(3))
    erdf=erdf.assign(prime_margin_4=erdf.sort_values('date').groupby('prime_team')['prime_margin'].shift(4))
    erdf=erdf.assign(prime_margin_5=erdf.sort_values('date').groupby('prime_team')['prime_margin'].shift(5))
    erdf=erdf.assign(prime_margin_6=erdf.sort_values('date').groupby('prime_team')['prime_margin'].shift(6))
    erdf=erdf.assign(pm_l6w=erdf.loc[:, ['prime_margin_1', 'prime_margin_2', 'prime_margin_3', 'prime_margin_4', 'prime_margin_5', 'prime_margin_6', ]].mean(axis=1))

    return erdf

In [4]:
expanded_results = result_expander(results)
feature_df = create_features(expanded_results)

In [5]:
feature_df[feature_df.result==0]

Unnamed: 0,season,round,venue,date,prime_team,prime_team_points,opt_team,opt_team_points,home_away,prime_margin,result,prime_margin_1,prime_margin_2,prime_margin_3,prime_margin_4,prime_margin_5,prime_margin_6,pm_l6w
259,2019,17,"Suncorp Stadium, Brisbane",2019-07-13,Broncos,18.0,Warriors,18.0,home,0.0,0.0,2.0,-14.0,-28.0,-8.0,6.0,5.0,-6.166667
526,2020,3,"Campbelltown Sports Stadium, Sydney",2020-05-31,Panthers,14.0,Knights,14.0,home,0.0,0.0,4.0,6.0,44.0,-16.0,-14.0,-12.0,2.0
1391,2019,17,"Suncorp Stadium, Brisbane",2019-07-13,Warriors,18.0,Broncos,18.0,away,0.0,0.0,4.0,-1.0,4.0,-22.0,-6.0,20.0,-0.166667
1658,2020,3,"Campbelltown Sports Stadium, Sydney",2020-05-31,Knights,14.0,Panthers,14.0,away,0.0,0.0,18.0,20.0,-44.0,34.0,-42.0,36.0,3.666667


In [6]:
feature_df.to_parquet('../data/raw/features.parquet')

In [7]:
feature_df[feature_df.season == 2023]

Unnamed: 0,season,round,venue,date,prime_team,prime_team_points,opt_team,opt_team_points,home_away,prime_margin,result,prime_margin_1,prime_margin_2,prime_margin_3,prime_margin_4,prime_margin_5,prime_margin_6,pm_l6w
928,2023,27,"Suncorp Stadium, Brisbane",2023-08-31,Broncos,,Storm,,home,,,,,,,,,
929,2023,27,"4 Pines Park, Sydney",2023-09-01,Sea Eagles,,Wests Tigers,,home,,,,,,,,,
930,2023,27,"Accor Stadium, Sydney",2023-09-01,Rabbitohs,,Roosters,,home,,,,,,,,,
931,2023,27,"Suncorp Stadium, Brisbane",2023-09-02,Dolphins,,Warriors,,home,,,,,,,,,
932,2023,27,"BlueBet Stadium, Penrith",2023-09-02,Panthers,,Cowboys,,home,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2259,2023,1,"4 Pines Park, Sydney",2023-03-04,Bulldogs,,Sea Eagles,,away,,,1.0,-16.0,-36.0,-24.0,-14.0,14.0,-12.500000
2260,2023,1,"Queensland Country Bank Stadium, Townsville",2023-03-04,Raiders,,Cowboys,,away,,,46.0,42.0,6.0,2.0,-20.0,12.0,14.666667
2261,2023,1,"PointsBet Stadium, Sydney",2023-03-04,Rabbitohs,,Sharks,,away,,,-10.0,10.0,-4.0,26.0,38.0,-1.0,9.833333
2262,2023,1,"Suncorp Stadium, Brisbane",2023-03-05,Roosters,,Dolphins,,away,,,10.0,4.0,66.0,14.0,18.0,10.0,20.333333
