In [15]:
import sys
import os
# Navigate up one level to the parent directory and append it to sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), os.pardir)))
import nfl_data_py as nfl
import pandas as pd
from src import utils
from src import homers

# Add Latest Week

In [16]:
SEASON, WEEK = 2023, 8

In [17]:
new_week_raw = pd.read_clipboard()
print(new_week_raw.shape)
new_week_raw.head()

(9, 7)


Unnamed: 0,Pick,Team,Hunter,Harry,Griffin,Ben,Chuck
0,BB,BAL,HOU,TEN,ATL,PIT,SEA
1,1,PIT,DAL,CLE,CAR,CLE,PHI
2,2,CIN,PIT,JAX,MIA,CIN,DAL
3,3,DAL,GB,WAS,KC,CHI,CHI
4,4,HOU,BAL,DEN,CIN,HOU,NYJ


In [18]:
new_week_processed = homers.process_picks(new_week_raw, WEEK, SEASON)
print(new_week_processed.shape)
new_week_processed.head()

(54, 18)


Unnamed: 0,picker,pick,season,week,spread_pick,best_bet,underdog_pick,survivor_pick,mnf_pick,game_id,away_team,home_team,result,spread_line,away_pick,away_cover,home_cover,pick_result
0,final,PIT,2023,8,True,False,False,False,False,2023_08_JAX_PIT,JAX,PIT,-10.0,-2.5,False,1.0,0.0,0.0
1,hunter,PIT,2023,8,True,False,False,False,False,2023_08_JAX_PIT,JAX,PIT,-10.0,-2.5,False,1.0,0.0,0.0
2,ben,PIT,2023,8,True,True,False,False,False,2023_08_JAX_PIT,JAX,PIT,-10.0,-2.5,False,1.0,0.0,0.0
3,final,DAL,2023,8,True,False,False,False,False,2023_08_LA_DAL,LA,DAL,23.0,6.5,False,0.0,1.0,1.0
4,hunter,DAL,2023,8,True,False,False,False,False,2023_08_LA_DAL,LA,DAL,23.0,6.5,False,0.0,1.0,1.0


In [19]:
pick_df = pd.read_pickle(homers.PROCESSED_FILE_PATH)
pick_df.shape

(2115, 18)

In [20]:
pick_df = pd.concat([pick_df, new_week_processed], ignore_index=True).drop_duplicates()
pick_df.shape

(2124, 18)

In [21]:
pick_df.to_pickle(homers.PROCESSED_FILE_PATH)

# Add 2023 Data

In [41]:
season = 2023
max_week = 4
nfl_df = nfl.import_schedules([season])[['game_id', 'season', 'week', 'away_team', 'home_team', 'away_score', 'home_score', 'result', 'spread_line']] 


weekly_picks = [processed]

for week in range(2, max_week + 1):
    df = pd.read_excel(f'{HOMERS_PATH}/homers-{season}.xlsx', sheet_name=f'Wk {week}')
    # df = df.dropna(axis=1, how='all')
    df = utils.clean_df_columns(df)
    df = df.rename(columns={'team': 'final', 'pick': 'pick_type'})
    assert all(col in ['pick_type', 'final', 'ben', 'chuck', 'griffin', 'harry', 'hunter'] for col in df.columns), print(df.columns)
    assert len(df) == 9
    df['pick_type'] = df.pick_type.map({i: 'reg' for i in range(1, 6)}).fillna(df['pick_type']).apply(str.lower)
    df['pick_type'] = df.pick_type.apply(lambda x: 'reg' if x not in ['bb', 'reg', 'sd', 'ud', 'mnf'] else x)
    # melt picks into a long table format
    transformed = pd.melt(df, id_vars=['pick_type'], var_name='picker', value_name='pick').dropna()
    transformed['season'] = season
    transformed['week'] = week
    transformed['pick'] = transformed['pick'].apply(str.upper).apply(str.strip).apply(lambda x: x.split('/')[0] if '/' in x else x)
    transformed = transformed[transformed['pick'] != '']
    # Map bad team names
    transformed['pick'] = transformed['pick'].apply(utils.standardize_teams)

    # turn pick types into one hot cols to make lookup faster
    transformed['spread_pick'] = transformed['pick_type'].map({'ud': False, 'sd': False}).fillna(True)
    transformed['best_bet'] = transformed['pick_type'] == 'bb'
    transformed['underdog_pick'] = transformed['pick_type'] == 'ud'
    transformed['survivor_pick'] = transformed['pick_type'] == 'sd'
    transformed['mnf_pick'] = transformed['pick_type'] == 'mnf'
    transformed = transformed.drop(columns='pick_type')

    # join home and away picks
    joined_away = pd.merge(
        transformed, 
        nfl_df,
        left_on=['season', 'week', 'pick'],
        right_on=['season', 'week', 'away_team']
    )
    joined_home = pd.merge(
        transformed, 
        nfl_df,
        left_on=['season', 'week', 'pick'],
        right_on=['season', 'week', 'home_team']
    )


    week_final_picks = pd.concat([joined_home, joined_away], ignore_index=True)
    week_final_picks['away_pick'] = week_final_picks['pick'] == week_final_picks['away_team']
    week_final_picks['away_cover'] = week_final_picks.apply(utils.cover_result, axis=1)
    week_final_picks['home_cover'] = 1 - week_final_picks['away_cover']
    week_final_picks['pick_result'] = week_final_picks.apply(utils.pick_result, axis=1)

    # add to list of picks
    weekly_picks.append(week_final_picks)
    print(f'{season} week {week}')
    del df

processed = pd.concat(weekly_picks, ignore_index=True)

2023 week 2
2023 week 3
2023 week 4


In [42]:
processed

Unnamed: 0,picker,pick,season,week,spread_pick,best_bet,underdog_pick,survivor_pick,mnf_pick,game_id,away_team,home_team,away_score,home_score,result,spread_line,away_pick,away_cover,home_cover,pick_result
0,final,NE,2021,1,True,True,False,False,False,2021_01_MIA_NE,MIA,NE,17.0,16.0,-1.0,3.5,False,1.0,0.0,0.0
1,ben,NE,2021,1,True,True,False,False,False,2021_01_MIA_NE,MIA,NE,17.0,16.0,-1.0,3.5,False,1.0,0.0,0.0
2,hunter,NE,2021,1,True,False,False,False,False,2021_01_MIA_NE,MIA,NE,17.0,16.0,-1.0,3.5,False,1.0,0.0,0.0
3,final,WAS,2021,1,True,False,False,False,False,2021_01_LAC_WAS,LAC,WAS,20.0,16.0,-4.0,2.5,False,1.0,0.0,0.0
4,ben,WAS,2021,1,True,False,False,False,False,2021_01_LAC_WAS,LAC,WAS,20.0,16.0,-4.0,2.5,False,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1911,ben,BAL,2023,4,True,False,False,False,False,2023_04_BAL_CLE,BAL,CLE,28.0,3.0,-25.0,-2.0,True,1.0,0.0,1.0
1912,chuck,BAL,2023,4,True,False,False,False,False,2023_04_BAL_CLE,BAL,CLE,28.0,3.0,-25.0,-2.0,True,1.0,0.0,1.0
1913,ben,MIN,2023,4,True,False,False,False,False,2023_04_MIN_CAR,MIN,CAR,21.0,13.0,-8.0,-4.5,True,1.0,0.0,1.0
1914,ben,MIA,2023,4,True,False,False,False,False,2023_04_MIA_BUF,MIA,BUF,20.0,48.0,28.0,2.5,True,0.0,1.0,0.0


In [43]:
processed.to_pickle(f'{HOMERS_PATH}/homers-processed.pkl')

# Process Raw Picks

In [2]:
seasons = [2021, 2022]

nfl_df = nfl.import_schedules(seasons)[['game_id', 'season', 'week', 'away_team', 'home_team', 'away_score', 'home_score', 'result', 'spread_line']] 
print('imported seasons')

season_picks = []

for season in seasons:
    weekly_picks = []

    for week in range(1, 19):
        df = pd.read_excel(f'{HOMERS_PATH}/homers-{season}.xlsx', sheet_name=f'Wk {week}', usecols=range(14), header=1)
        df = df.drop(df.index[9:])
        df = df.dropna(axis=1, how='all')
        if 'Notes' in df.columns:
            df = df.drop(columns=['Notes'])
        df = df.drop(columns=[col for col in df.columns if 'Unnamed' in col])
        df = utils.clean_df_columns(df)
        df = df.rename(columns={'team': 'final', 'picks': 'pick_type'})
        assert all(col in ['pick_type', 'final', 'ben', 'chuck', 'griffin', 'harry', 'hunter'] for col in df.columns)
        assert len(df) == 9
        df['pick_type'] = df.pick_type.map({i: 'reg' for i in range(1, 6)}).fillna(df['pick_type']).apply(str.lower)
        df['pick_type'] = df.pick_type.map({'underdog': 'ud', 'survivor': 'sd'}).fillna(df['pick_type'])
        df['pick_type'] = df.pick_type.apply(lambda x: 'reg' if x not in ['bb', 'reg', 'sd', 'ud', 'mnf'] else x)
        # melt picks into a long table format
        transformed = pd.melt(df, id_vars=['pick_type'], var_name='picker', value_name='pick').dropna()
        transformed['season'] = season
        transformed['week'] = week
        transformed['pick'] = transformed['pick'].apply(str.upper).apply(str.strip).apply(lambda x: x.split('/')[0] if '/' in x else x)
        transformed = transformed[transformed['pick'] != '']
        # Map bad team names
        transformed['pick'] = transformed['pick'].apply(utils.standardize_teams)

        # turn pick types into one hot cols to make lookup faster
        transformed['spread_pick'] = transformed['pick_type'].map({'ud': False, 'sd': False}).fillna(True)
        transformed['best_bet'] = transformed['pick_type'] == 'bb'
        transformed['underdog_pick'] = transformed['pick_type'] == 'ud'
        transformed['survivor_pick'] = transformed['pick_type'] == 'sd'
        transformed['mnf_pick'] = transformed['pick_type'] == 'mnf'
        transformed = transformed.drop(columns='pick_type')

        # join home and away picks
        joined_away = pd.merge(
            transformed, 
            nfl_df,
            left_on=['season', 'week', 'pick'],
            right_on=['season', 'week', 'away_team']
        )
        joined_home = pd.merge(
            transformed, 
            nfl_df,
            left_on=['season', 'week', 'pick'],
            right_on=['season', 'week', 'home_team']
        )


        week_final_picks = pd.concat([joined_home, joined_away], ignore_index=True)
        week_final_picks['away_pick'] = week_final_picks['pick'] == week_final_picks['away_team']
        week_final_picks['away_cover'] = week_final_picks.apply(utils.cover_result, axis=1)
        week_final_picks['home_cover'] = 1 - week_final_picks['away_cover']
        week_final_picks['pick_result'] = week_final_picks.apply(utils.pick_result, axis=1)

        # add to list of picks
        weekly_picks.append(week_final_picks)
        print(f'{season} week {week}')

    season_picks.append(pd.concat(weekly_picks, ignore_index=True))

df = pd.concat(season_picks, ignore_index=True)

imported seasons
2021 week 1
2021 week 2
2021 week 3
2021 week 4
2021 week 5
2021 week 6
2021 week 7
2021 week 8
2021 week 9
2021 week 10
2021 week 11
2021 week 12
2021 week 13
2021 week 14
2021 week 15
2021 week 16
2021 week 17
2021 week 18
2022 week 1
2022 week 2
2022 week 3
2022 week 4
2022 week 5
2022 week 6
2022 week 7
2022 week 8
2022 week 9
2022 week 10
2022 week 11
2022 week 12
2022 week 13
2022 week 14
2022 week 15
2022 week 16
2022 week 17
2022 week 18


In [3]:
df.head()

Unnamed: 0,picker,pick,season,week,spread_pick,best_bet,underdog_pick,survivor_pick,mnf_pick,game_id,away_team,home_team,away_score,home_score,result,spread_line,away_pick,away_cover,home_cover,pick_result
0,final,NE,2021,1,True,True,False,False,False,2021_01_MIA_NE,MIA,NE,17.0,16.0,-1.0,3.5,False,1.0,0.0,0.0
1,ben,NE,2021,1,True,True,False,False,False,2021_01_MIA_NE,MIA,NE,17.0,16.0,-1.0,3.5,False,1.0,0.0,0.0
2,hunter,NE,2021,1,True,False,False,False,False,2021_01_MIA_NE,MIA,NE,17.0,16.0,-1.0,3.5,False,1.0,0.0,0.0
3,final,WAS,2021,1,True,False,False,False,False,2021_01_LAC_WAS,LAC,WAS,20.0,16.0,-4.0,2.5,False,1.0,0.0,0.0
4,ben,WAS,2021,1,True,False,False,False,False,2021_01_LAC_WAS,LAC,WAS,20.0,16.0,-4.0,2.5,False,1.0,0.0,0.0


In [25]:
df.to_pickle(f'{HOMERS_PATH}/homers-processed.pkl')