In [1]:
import sys
import os
import math
# Navigate up one level to the parent directory and append it to sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), os.pardir)))
import nfl_data_py as nfl
import pandas as pd

from sklearn.linear_model import LogisticRegression
import plotly.express as px

from src import utils
from src import homers

# Research Question Pool

* could we allocate coefficients for voting?

* Were we homers? (picking our teams, picking the same teams)

* Was there correlation between doing well one week, and doing well the next week?

* What trends emerged (picking home/away, dogs/favorites)

* how did our underdog and SD picks fare against the spread

* Did the different lines matter?

In [2]:
df = pd.read_pickle(homers.PROCESSED_FILE_PATH)

# Weekly Update

In [3]:
# Filter for the rows with the highest season
season_df = df[df['season'] == df['season'].max()]
# Now, create a subset for the latest week in the highest season
week_df = season_df[season_df['week'] == season_df['week'].max()]
week_df.shape

(54, 18)

In [4]:
homers.plot_scores(week_df)

8

In [10]:
homers.plot_scores(season_df, 'spread_pick', agg_sum=False)
latest_week = max(season_df[season_df['season'] == max(season_df['season'])]['week'])
last_4_week_criteria = season_df['week'] > latest_week - 4
homers.plot_scores(season_df[last_4_week_criteria], 'spread_pick', agg_sum=False)

In [11]:
homers.plot_scores(df, 'spread_pick', agg_sum=False)

In [7]:
homers.plot_scores(week_df, 'best_bet')

In [12]:
homers.plot_scores(season_df, 'best_bet', agg_sum=False)

In [13]:
homers.plot_scores(week_df, 'underdog_pick')

In [14]:
homers.plot_scores(season_df, 'underdog_pick', agg_sum=False)
homers.plot_scores(df, 'underdog_pick', agg_sum=False)

In [12]:
homers.plot_scores(week_df, 'survivor_pick')

# Consensus picks

* If we do a simple sum up of consensus picks, how did that score?
* What if we try a zero sum consensus?
* are the picks we are all on the same side on actually bad? majority vs no oposition? unanimous?

In [41]:
SEASON, WEEK = 2023, 4
df = pd.read_pickle(homers.PROCESSED_FILE_PATH)

In [42]:
# individual = df[(df['season']==SEASON) & (df['week'] == WEEK) & (df['picker'] != 'final') & (df['spread_pick']) & (~df['mnf_pick'])].copy()
individual = df[(df['picker'] != 'final') & (df['spread_pick']) & (~df['mnf_pick'])].copy()

In [43]:
individual['pick_multiplier'] = individual['best_bet'] + 1

In [44]:
individual

Unnamed: 0,picker,pick,season,week,spread_pick,best_bet,underdog_pick,survivor_pick,mnf_pick,game_id,away_team,home_team,result,spread_line,away_pick,away_cover,home_cover,pick_result,pick_multiplier
1,ben,NE,2021,1,True,True,False,False,False,2021_01_MIA_NE,MIA,NE,-1.0,3.5,False,1.0,0.0,0.0,2
2,hunter,NE,2021,1,True,False,False,False,False,2021_01_MIA_NE,MIA,NE,-1.0,3.5,False,1.0,0.0,0.0,1
4,ben,WAS,2021,1,True,False,False,False,False,2021_01_LAC_WAS,LAC,WAS,-4.0,2.5,False,1.0,0.0,0.0,1
6,chuck,WAS,2021,1,True,False,False,False,False,2021_01_LAC_WAS,LAC,WAS,-4.0,2.5,False,1.0,0.0,0.0,1
7,harry,WAS,2021,1,True,False,False,False,False,2021_01_LAC_WAS,LAC,WAS,-4.0,2.5,False,1.0,0.0,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2063,harry,LV,2023,7,True,False,False,False,False,2023_07_LV_CHI,LV,CHI,18.0,-2.5,True,0.0,1.0,0.0,1
2064,harry,LAC,2023,7,True,False,False,False,False,2023_07_LAC_KC,LAC,KC,14.0,5.5,True,0.0,1.0,0.0,1
2065,griffin,LAC,2023,7,True,False,False,False,False,2023_07_LAC_KC,LAC,KC,14.0,5.5,True,0.0,1.0,0.0,1
2068,harry,ARI,2023,7,True,False,False,False,False,2023_07_ARI_SEA,ARI,SEA,10.0,9.5,True,0.0,1.0,0.0,1


In [48]:
consensus = individual.groupby(['pick', 'season', 'week'])[['spread_pick', 'best_bet']].sum().reset_index()
consensus['weighted_consensus'] = consensus['spread_pick'] + consensus['best_bet']
consensus = consensus.rename(columns={'spread_pick':'consensus'})
consensus = consensus.sort_values(by='weighted_consensus', ascending=False)
consensus = consensus.drop('best_bet', axis=1)
consensus

Unnamed: 0,pick,season,week,consensus,weighted_consensus
614,SF,2022,14,5,8
43,BAL,2021,13,4,6
147,CIN,2022,16,3,6
153,CLE,2021,4,4,6
239,GB,2021,5,4,6
...,...,...,...,...,...
35,ATL,2023,6,1,1
231,DET,2022,18,1,1
442,MIN,2021,13,1,1
440,MIN,2021,6,1,1


In [53]:
df = pd.merge(consensus, individual[['season', 'week', 'pick', 'pick_result']], on=['season', 'week', 'pick'])
df = df.drop_duplicates()
df['pick_result'] = df.pick_result.apply(lambda x: math.ceil(math.floor(x) / 2))
df

Unnamed: 0,pick,season,week,consensus,weighted_consensus,pick_result
0,SF,2022,14,5,8,1
2,SF,2022,14,5,8,1
5,BAL,2021,13,4,6,0
9,CIN,2022,16,3,6,1
12,CLE,2021,4,4,6,1
...,...,...,...,...,...,...
1202,ATL,2023,6,1,1,0
1203,DET,2022,18,1,1,1
1204,MIN,2021,13,1,1,0
1205,MIN,2021,6,1,1,1


In [98]:
col = 'weighted_consensus'

In [99]:
model = LogisticRegression()
model.fit(df[[col]], df['pick_result'])
print(model.coef_[0][0])
math.exp(model.coef_[0][0])

0.27255230848537726


1.3133121544854203

In [100]:
df.groupby(col).pick_result.count()

weighted_consensus
1    315
2    186
3    135
4     86
5     29
6     14
8      2
Name: pick_result, dtype: int64

In [101]:
df.groupby(col).pick_result.mean()

weighted_consensus
1    0.428571
2    0.505376
3    0.577778
4    0.651163
5    0.586207
6    0.785714
8    1.000000
Name: pick_result, dtype: float64

In [102]:
fig = px.bar(
    df.groupby(col).pick_result.count(),
    text_auto=True,
    title=f'Pick count by {col} score'
)
fig.show()

In [103]:
fig = px.bar(
    df.groupby(col).pick_result.mean().round(2),
    text_auto=True,
    title=f'Pick % by {col} score'
)
fig.show()

In [26]:
df

Unnamed: 0,pick,consensus,weighted_consensus,season,week,spread_pick,best_bet,underdog_pick,survivor_pick,mnf_pick,game_id,away_team,home_team,result,spread_line,away_pick,away_cover,home_cover,pick_result,pick_multiplier
0,BUF,3,4,2023,4,True,False,False,False,False,2023_04_MIA_BUF,MIA,BUF,28.0,2.5,False,0.0,1.0,1.0,1
1,BUF,3,4,2023,4,True,True,False,False,False,2023_04_MIA_BUF,MIA,BUF,28.0,2.5,False,0.0,1.0,2.0,2
3,KC,3,4,2023,4,True,False,False,False,False,2023_04_KC_NYJ,KC,NYJ,-3.0,-9.5,True,0.0,1.0,0.0,1
4,KC,3,4,2023,4,True,True,False,False,False,2023_04_KC_NYJ,KC,NYJ,-3.0,-9.5,True,0.0,1.0,0.0,2
6,LA,2,3,2023,4,True,False,False,False,False,2023_04_LA_IND,LA,IND,-6.0,-1.0,True,1.0,0.0,1.0,1
7,LA,2,3,2023,4,True,True,False,False,False,2023_04_LA_IND,LA,IND,-6.0,-1.0,True,1.0,0.0,2.0,2
8,CLE,2,3,2023,4,True,True,False,False,False,2023_04_BAL_CLE,BAL,CLE,-25.0,-2.0,False,1.0,0.0,0.0,2
9,CLE,2,3,2023,4,True,False,False,False,False,2023_04_BAL_CLE,BAL,CLE,-25.0,-2.0,False,1.0,0.0,0.0,1
10,TB,2,2,2023,4,True,False,False,False,False,2023_04_TB_NO,TB,NO,-17.0,4.0,True,1.0,0.0,1.0,1
12,CIN,2,2,2023,4,True,False,False,False,False,2023_04_CIN_TEN,CIN,TEN,24.0,-2.5,True,0.0,1.0,0.0,1


In [35]:
schedule = nfl.import_schedules([SEASON])
week_schedule = schedule[schedule['week']==WEEK]
week_schedule

Unnamed: 0,game_id,season,game_type,week,gameday,weekday,gametime,away_team,away_score,home_team,...,wind,away_qb_id,home_qb_id,away_qb_name,home_qb_name,away_coach,home_coach,referee,stadium_id,stadium
6469,2023_04_DET_GB,2023,REG,4,2023-09-28,Thursday,20:15,DET,34.0,GB,...,8.0,00-0033106,00-0036264,Jared Goff,Jordan Love,Dan Campbell,Matt LaFleur,Alan Eck,GNB00,Lambeau Field
6470,2023_04_ATL_JAX,2023,REG,4,2023-10-01,Sunday,09:30,ATL,7.0,JAX,...,,00-0038122,00-0036971,Desmond Ridder,Trevor Lawrence,Arthur Smith,Doug Pederson,Craig Wrolstad,JAX00,TIAA Bank Stadium
6471,2023_04_MIA_BUF,2023,REG,4,2023-10-01,Sunday,13:00,MIA,20.0,BUF,...,2.0,00-0036212,00-0034857,Tua Tagovailoa,Josh Allen,Mike McDaniel,Sean McDermott,Adrian Hill,BUF00,New Era Field
6472,2023_04_MIN_CAR,2023,REG,4,2023-10-01,Sunday,13:00,MIN,21.0,CAR,...,7.0,00-0029604,00-0039150,Kirk Cousins,Bryce Young,Kevin O'Connell,Frank Reich,Tra Blake,CAR00,Bank of America Stadium
6473,2023_04_DEN_CHI,2023,REG,4,2023-10-01,Sunday,13:00,DEN,31.0,CHI,...,2.0,00-0029263,00-0036945,Russell Wilson,Justin Fields,Sean Payton,Matt Eberflus,Carl Cheffers,CHI98,Soldier Field
6474,2023_04_BAL_CLE,2023,REG,4,2023-10-01,Sunday,13:00,BAL,28.0,CLE,...,7.0,00-0034796,00-0038583,Lamar Jackson,Dorian Thompson-Robinson,John Harbaugh,Kevin Stefanski,Brad Allen,CLE00,FirstEnergy Stadium
6475,2023_04_PIT_HOU,2023,REG,4,2023-10-01,Sunday,13:00,PIT,6.0,HOU,...,,00-0038102,00-0039163,Kenny Pickett,C.J. Stroud,Mike Tomlin,DeMeco Ryans,Bill Vinovich,HOU00,NRG Stadium
6476,2023_04_LA_IND,2023,REG,4,2023-10-01,Sunday,13:00,LA,29.0,IND,...,,00-0026498,00-0039164,Matthew Stafford,Anthony Richardson,Sean McVay,Shane Steichen,Land Clark,IND00,Lucas Oil Stadium
6477,2023_04_TB_NO,2023,REG,4,2023-10-01,Sunday,13:00,TB,26.0,NO,...,,00-0034855,00-0031280,Baker Mayfield,Derek Carr,Todd Bowles,Dennis Allen,Shawn Hochuli,NOR00,Mercedes-Benz Superdome
6478,2023_04_WAS_PHI,2023,REG,4,2023-10-01,Sunday,13:00,WAS,31.0,PHI,...,,00-0037077,00-0036389,Sam Howell,Jalen Hurts,Ron Rivera,Nick Sirianni,Ron Torbert,PHI00,Lincoln Financial Field


In [38]:
opponent_dict = dict(**dict(zip(week_schedule['away_team'], week_schedule['home_team'])), **dict(zip(week_schedule['home_team'], week_schedule['away_team'])))
consensus['opponent'] = consensus['pick'].map(opponent_dict)
consensus = pd.merge(consensus, consensus[['spread_pick', 'best_bet', 'weighted_consensus', 'opponent']], left_on='pick', right_on='opponent', suffixes=("", '_opponent'))
consensus = consensus.drop(columns=['opponent', 'pick_opponent'])
consensus

Unnamed: 0,pick,spread_pick,best_bet,weighted_consensus,spread_pick_opponent,best_bet_opponent,weighted_consensus_opponent,opponent_opponent,spread_pick_opponent.1,best_bet_opponent.1,weighted_consensus_opponent.1,opponent_opponent.1,spread_pick_opponent.2,best_bet_opponent.2,weighted_consensus_opponent.2,opponent_opponent.2
0,BUF,3,1,4,1,0,1,BUF,1,0,1,BUF,1,0,1,BUF
1,LA,2,1,3,1,0,1,LA,1,0,1,LA,1,0,1,LA
2,CLE,2,1,3,2,0,2,CLE,2,0,2,CLE,2,0,2,CLE
3,DAL,2,0,2,1,1,2,DAL,1,1,2,DAL,1,1,2,DAL
4,NE,1,1,2,2,0,2,NE,2,0,2,NE,2,0,2,NE
5,BAL,2,0,2,2,1,3,BAL,2,1,3,BAL,2,1,3,BAL
6,PIT,1,0,1,1,0,1,PIT,1,0,1,PIT,1,0,1,PIT
7,MIN,1,0,1,1,0,1,MIN,1,0,1,MIN,1,0,1,MIN
8,MIA,1,0,1,3,1,4,MIA,3,1,4,MIA,3,1,4,MIA
9,IND,1,0,1,2,1,3,IND,2,1,3,IND,2,1,3,IND


In [37]:
pd.merge(consensus, individual, left_index=True, right_on='pick')

Unnamed: 0,pick,pick_x,picker,pick_y,season,week,spread_pick,best_bet,underdog_pick,survivor_pick,...,home_team,away_score,home_score,result,spread_line,away_pick,away_cover,home_cover,pick_result,pick_multiplier
1868,BUF,3,hunter,BUF,2023,4,True,False,False,False,...,BUF,20.0,48.0,28.0,2.5,False,0.0,1.0,1.0,1
1869,BUF,3,harry,BUF,2023,4,True,True,False,False,...,BUF,20.0,48.0,28.0,2.5,False,0.0,1.0,2.0,2
1870,BUF,3,chuck,BUF,2023,4,True,False,False,False,...,BUF,20.0,48.0,28.0,2.5,False,0.0,1.0,1.0,1
1893,SEA,3,hunter,SEA,2023,4,True,False,False,False,...,NYG,24.0,3.0,-21.0,-2.5,True,1.0,0.0,1.0,1
1894,SEA,3,harry,SEA,2023,4,True,False,False,False,...,NYG,24.0,3.0,-21.0,-2.5,True,1.0,0.0,1.0,1
1895,SEA,3,ben,SEA,2023,4,True,False,False,False,...,NYG,24.0,3.0,-21.0,-2.5,True,1.0,0.0,1.0,1
1896,KC,3,hunter,KC,2023,4,True,False,False,False,...,NYJ,23.0,20.0,-3.0,-9.5,True,0.0,1.0,0.0,1
1897,KC,3,griffin,KC,2023,4,True,True,False,False,...,NYJ,23.0,20.0,-3.0,-9.5,True,0.0,1.0,0.0,2
1898,KC,3,chuck,KC,2023,4,True,False,False,False,...,NYJ,23.0,20.0,-3.0,-9.5,True,0.0,1.0,0.0,1
1905,TB,2,harry,TB,2023,4,True,False,False,False,...,NO,26.0,9.0,-17.0,4.0,True,1.0,0.0,1.0,1


# Team Pick Frequency & Homerism

In [4]:
spread_picks = df[df['spread_pick']].copy()
spread_picks['win'] = spread_picks['pick_result'] >= 1
spread_picks

Unnamed: 0,picker,pick,season,week,spread_pick,best_bet,underdog_pick,survivor_pick,mnf_pick,game_id,away_team,home_team,result,spread_line,away_pick,away_cover,home_cover,pick_result,win
0,final,NE,2021,1,True,True,False,False,False,2021_01_MIA_NE,MIA,NE,-1.0,3.5,False,1.0,0.0,0.0,False
1,ben,NE,2021,1,True,True,False,False,False,2021_01_MIA_NE,MIA,NE,-1.0,3.5,False,1.0,0.0,0.0,False
2,hunter,NE,2021,1,True,False,False,False,False,2021_01_MIA_NE,MIA,NE,-1.0,3.5,False,1.0,0.0,0.0,False
3,final,WAS,2021,1,True,False,False,False,False,2021_01_LAC_WAS,LAC,WAS,-4.0,2.5,False,1.0,0.0,0.0,False
4,ben,WAS,2021,1,True,False,False,False,False,2021_01_LAC_WAS,LAC,WAS,-4.0,2.5,False,1.0,0.0,0.0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1962,harry,CAR,2023,5,True,True,False,False,False,2023_05_CAR_DET,CAR,DET,18.0,9.5,True,0.0,1.0,0.0,False
1965,chuck,CAR,2023,5,True,False,False,False,False,2023_05_CAR_DET,CAR,DET,18.0,9.5,True,0.0,1.0,0.0,False
1967,griffin,NO,2023,5,True,False,False,False,False,2023_05_NO_NE,NO,NE,-34.0,2.5,True,1.0,0.0,1.0,True
1968,chuck,BAL,2023,5,True,False,False,False,False,2023_05_BAL_PIT,BAL,PIT,7.0,-4.5,True,0.0,1.0,0.0,False


In [33]:
for picker in spread_picks['picker'].unique():
    tmp = spread_picks[spread_picks['picker'] == picker].groupby('pick')['pick'].count().sort_values(ascending=False).to_frame().rename(columns={'pick': 'times_picked'}).reset_index().rename(columns={'pick': 'team'})

    col = 'times_picked'

    # Create the bar chart
    fig = px.bar(tmp, x=col, y='team', orientation='h', color='team', color_discrete_map=utils.team_unique_colors)


    tmp2 = spread_picks[spread_picks['picker'] == picker].groupby('pick')['best_bet'].sum().sort_values(ascending=False).to_frame().rename(columns={'pick': 'times_picked'}).reset_index().rename(columns={'pick': 'team'})
    tmp2['color'] = tmp2['team'].map(utils.team_unique_colors)
    
    tmp = pd.merge(tmp, tmp2, how='outer', on='team')
    
    # Duplicate the data to create the stacked bar with lower opacity
    fig.add_bar(x=tmp['best_bet'], y=tmp['team'], orientation='h', marker_color=tmp['color'], opacity=0.5)

    fig.update_yaxes(categoryorder='total ascending')
    fig.update_layout(
        height=1000,
        width=800,
        xaxis_title=f'Times Picked ATS',
        yaxis_title='Teams',
        title=f'Times {picker.capitalize()} Picked a Team ATS',
    )
    fig.update_traces(showlegend=False)

    # Iterate through the data and add logos to the chart
    for index, row in tmp.iterrows():
        team = row['team']
        scale = 1.25
        fig.add_layout_image(
            dict(source=f'https://a.espncdn.com/i/teamlogos/nfl/500/{team}.png',
                x=row[col]+row['best_bet'],  # Adjust the position
                y=team,
                xref="x",
                yref="y",
                sizex=scale,  # Adjust the size
                sizey=scale,  # Adjust the size
                sizing="contain",
                opacity=1,
                xanchor="center",
                yanchor="middle",        
            )
        )

    # Show the chart
    fig.show()

In [5]:
picker='griffin'
tmp = spread_picks[spread_picks['picker']==picker].groupby('pick')['pick'].count().sort_values(ascending=False).to_frame().rename(columns={'pick':'times_picked'}).reset_index().rename(columns={'pick':'team'})
col = 'times_picked'

# Create the bar chart
fig = px.bar(tmp, x=col, y='team', orientation='h', color='team', color_discrete_map=utils.team_unique_colors)
fig.update_yaxes(categoryorder='total ascending')
fig.update_layout(
    height=1000,
    width=800,
    xaxis_title=f'Times Picked ATS',
    yaxis_title='Teams',
    title=f'Times {picker.capitalize()} Picked a Team ATS',
)
fig.update_traces(showlegend=False)

# # Iterate through the data and add logos to the chart
# for index, row in tmp.iterrows():
#     team = row['team']
#     scale = 1.25
#     fig.add_layout_image(
#         dict(source=f'https://a.espncdn.com/i/teamlogos/nfl/500/{team}.png',
#             x=row[col],  # Adjust the position
#             y=team,
#             xref="x",
#             yref="y",
#             sizex=scale,  # Adjust the size
#             sizey=scale,  # Adjust the size
#             sizing="contain",
#             opacity=1,
#             xanchor="center",
#             yanchor="middle",        
#         )
#     )

# Show the chart
fig.show()

Unnamed: 0,team,best_bet
0,SEA,3
1,CLE,3
2,GB,3
3,LV,3
4,IND,3
5,ARI,2
6,CIN,2
7,JAX,2
8,ATL,1
9,TB,1


In [31]:
picker = 'griffin'
tmp = spread_picks[spread_picks['picker'] == picker].groupby('pick')['pick'].count().sort_values(ascending=False).to_frame().rename(columns={'pick': 'times_picked'}).reset_index().rename(columns={'pick': 'team'})

col = 'times_picked'

# Create the bar chart
fig = px.bar(tmp, x=col, y='team', orientation='h', color='team', color_discrete_map=utils.team_unique_colors)


tmp = spread_picks[spread_picks['picker'] == picker].groupby('pick')['best_bet'].sum().sort_values(ascending=False).to_frame().rename(columns={'pick': 'times_picked'}).reset_index().rename(columns={'pick': 'team'})
tmp['color'] = tmp['team'].map(utils.team_unique_colors)
# Duplicate the data to create the stacked bar with lower opacity
fig.add_bar(x=tmp['best_bet'], y=tmp['team'], orientation='h', marker_color=tmp['color'], opacity=0.5)

fig.update_yaxes(categoryorder='total ascending')
fig.update_layout(
    height=1000,
    width=800,
    xaxis_title=f'Times Picked ATS',
    yaxis_title='Teams',
    title=f'Times {picker.capitalize()} Picked a Team ATS',
)
fig.update_traces(showlegend=False)


# Graveyard

what do i need to do:
* find the most popular picks
* make a list of them with "consensus" as the picker and the other necessary cols
* join in nfl data and evaluate


In [None]:
nfl_df = nfl.import_schedules([SEASON])[['game_id', 'season', 'week', 'away_team', 'home_team', 'away_score', 'home_score', 'result', 'spread_line']] 




# turn pick types into one hot cols to make lookup faster
transformed['spread_pick'] = transformed['pick_type'].map({'ud': False, 'sd': False}).fillna(True)
transformed['best_bet'] = transformed['pick_type'] == 'bb'
transformed['underdog_pick'] = transformed['pick_type'] == 'ud'
transformed['survivor_pick'] = transformed['pick_type'] == 'sd'
transformed['mnf_pick'] = transformed['pick_type'] == 'mnf'
transformed = transformed.drop(columns='pick_type')

# join home and away picks
joined_away = pd.merge(
    transformed, 
    nfl_df,
    left_on=['season', 'week', 'pick'],
    right_on=['season', 'week', 'away_team']
)
joined_home = pd.merge(
    transformed, 
    nfl_df,
    left_on=['season', 'week', 'pick'],
    right_on=['season', 'week', 'home_team']
)


week_final_picks = pd.concat([joined_home, joined_away], ignore_index=True)
week_final_picks['away_pick'] = week_final_picks['pick'] == week_final_picks['away_team']
week_final_picks['away_cover'] = week_final_picks.apply(utils.cover_result, axis=1)
week_final_picks['home_cover'] = 1 - week_final_picks['away_cover']
week_final_picks['pick_result'] = week_final_picks.apply(utils.pick_result, axis=1)

