In [31]:
import pandas as pd
import pandasql as ps
import numpy as np
import warnings

In [32]:
df = pd.read_csv('NFL_Bet_Data.csv')
df.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail
0,9/2/1966,1966,1,False,Miami Dolphins,14.0,23.0,Oakland Raiders,,,,Orange Bowl,False,83.0,6.0,71.0,
1,9/3/1966,1966,1,False,Houston Oilers,45.0,7.0,Denver Broncos,,,,Rice Stadium,False,81.0,7.0,70.0,
2,9/4/1966,1966,1,False,San Diego Chargers,27.0,7.0,Buffalo Bills,,,,Balboa Stadium,False,70.0,7.0,82.0,
3,9/9/1966,1966,2,False,Miami Dolphins,14.0,19.0,New York Jets,,,,Orange Bowl,False,82.0,11.0,78.0,
4,9/10/1966,1966,1,False,Green Bay Packers,24.0,3.0,Baltimore Colts,,,,Lambeau Field,False,64.0,8.0,62.0,


In [33]:
df.dtypes

schedule_date           object
schedule_season          int64
schedule_week           object
schedule_playoff          bool
team_home               object
score_home             float64
score_away             float64
team_away               object
team_favorite_id        object
spread_favorite        float64
over_under_line         object
stadium                 object
stadium_neutral           bool
weather_temperature    float64
weather_wind_mph       float64
weather_humidity       float64
weather_detail          object
dtype: object

In [34]:
# Convert the schedule_date from object to datetime
df['schedule_date'] = pd.to_datetime(df['schedule_date'])
df.dtypes

schedule_date          datetime64[ns]
schedule_season                 int64
schedule_week                  object
schedule_playoff                 bool
team_home                      object
score_home                    float64
score_away                    float64
team_away                      object
team_favorite_id               object
spread_favorite               float64
over_under_line                object
stadium                        object
stadium_neutral                  bool
weather_temperature           float64
weather_wind_mph              float64
weather_humidity              float64
weather_detail                 object
dtype: object

In [35]:
# Remove NAN Data that doesn't have a spread and over_under_line. We only want to look at data that has both of these
spread_df = df[~df['spread_favorite'].isnull()]
spread_df  = spread_df = df[~df['over_under_line'].isnull()]
spread_df

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail
350,1968-01-14,1967,Superbowl,True,Green Bay Packers,33.0,14.0,Oakland Raiders,GB,-13.5,43,Orange Bowl,True,60.0,12.0,74.0,
538,1969-01-12,1968,Superbowl,True,Baltimore Colts,7.0,16.0,New York Jets,IND,-18.0,40,Orange Bowl,True,66.0,12.0,80.0,Rain
727,1970-01-11,1969,Superbowl,True,Kansas City Chiefs,23.0,7.0,Minnesota Vikings,MIN,-12.0,39,Tulane Stadium,True,55.0,14.0,84.0,Rain
916,1971-01-17,1970,Superbowl,True,Baltimore Colts,16.0,13.0,Dallas Cowboys,IND,-2.5,36,Orange Bowl,True,59.0,11.0,60.0,
1105,1972-01-16,1971,Superbowl,True,Dallas Cowboys,24.0,3.0,Miami Dolphins,DAL,-6.0,34,Tulane Stadium,True,34.0,18.0,40.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13321,2022-10-16,2022,6,False,New York Giants,24.0,20.0,Baltimore Ravens,BAL,-5.5,45.5,MetLife Stadium,False,,,,
13322,2022-10-16,2022,6,False,Philadelphia Eagles,26.0,17.0,Dallas Cowboys,PHI,-6.5,42.5,Lincoln Financial Field,False,,,,
13323,2022-10-16,2022,6,False,Pittsburgh Steelers,20.0,18.0,Tampa Bay Buccaneers,TB,-10.0,46.5,Acrisure Stadium,False,,,,
13324,2022-10-16,2022,6,False,Seattle Seahawks,19.0,9.0,Arizona Cardinals,ARI,-2.5,50.5,Lumen Field,False,,,,


In [36]:
# Only return data for the last 10 seasons
spread_df = spread_df.loc[spread_df['schedule_season'] > 2011]
# Alternate way to do the same thing above
# spread_df = spread_df[spread_df['schedule_season'] > 2011]
spread_df['schedule_season'].unique().tolist()

[2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]

In [37]:
# Only Returns the Columns that we are going to work with
spread_df = spread_df[['schedule_season', 'schedule_week', 'schedule_playoff', 'team_home', 'team_away', 'score_home', 'score_away', 'team_favorite_id', 'spread_favorite', 'over_under_line', 'stadium_neutral']]
spread_df

Unnamed: 0,schedule_season,schedule_week,schedule_playoff,team_home,team_away,score_home,score_away,team_favorite_id,spread_favorite,over_under_line,stadium_neutral
10542,2012,1,False,New York Giants,Dallas Cowboys,17.0,24.0,NYG,-4.0,46,False
10543,2012,1,False,Arizona Cardinals,Seattle Seahawks,20.0,16.0,SEA,-3.0,41,False
10544,2012,1,False,Chicago Bears,Indianapolis Colts,41.0,21.0,CHI,-10.0,42.5,False
10545,2012,1,False,Cleveland Browns,Philadelphia Eagles,16.0,17.0,PHI,-9.5,42,False
10546,2012,1,False,Denver Broncos,Pittsburgh Steelers,31.0,19.0,DEN,-2.0,45.5,False
...,...,...,...,...,...,...,...,...,...,...,...
13321,2022,6,False,New York Giants,Baltimore Ravens,24.0,20.0,BAL,-5.5,45.5,False
13322,2022,6,False,Philadelphia Eagles,Dallas Cowboys,26.0,17.0,PHI,-6.5,42.5,False
13323,2022,6,False,Pittsburgh Steelers,Tampa Bay Buccaneers,20.0,18.0,TB,-10.0,46.5,False
13324,2022,6,False,Seattle Seahawks,Arizona Cardinals,19.0,9.0,ARI,-2.5,50.5,False


In [38]:
# Remove playoff games and games played at neautral locations then drop these columns
spread_df = spread_df.loc[(spread_df['schedule_playoff'] == False) & (spread_df['stadium_neutral'] == False)]
spread_df = spread_df.drop(columns=['schedule_playoff', 'stadium_neutral'])
spread_df = spread_df.reset_index()

In [39]:
# Add Total Score from both teams
spread_df['Total_Score'] = spread_df['score_home'] + spread_df['score_away']
spread_df

Unnamed: 0,index,schedule_season,schedule_week,team_home,team_away,score_home,score_away,team_favorite_id,spread_favorite,over_under_line,Total_Score
0,10542,2012,1,New York Giants,Dallas Cowboys,17.0,24.0,NYG,-4.0,46,41.0
1,10543,2012,1,Arizona Cardinals,Seattle Seahawks,20.0,16.0,SEA,-3.0,41,36.0
2,10544,2012,1,Chicago Bears,Indianapolis Colts,41.0,21.0,CHI,-10.0,42.5,62.0
3,10545,2012,1,Cleveland Browns,Philadelphia Eagles,16.0,17.0,PHI,-9.5,42,33.0
4,10546,2012,1,Denver Broncos,Pittsburgh Steelers,31.0,19.0,DEN,-2.0,45.5,50.0
...,...,...,...,...,...,...,...,...,...,...,...
2631,13321,2022,6,New York Giants,Baltimore Ravens,24.0,20.0,BAL,-5.5,45.5,44.0
2632,13322,2022,6,Philadelphia Eagles,Dallas Cowboys,26.0,17.0,PHI,-6.5,42.5,43.0
2633,13323,2022,6,Pittsburgh Steelers,Tampa Bay Buccaneers,20.0,18.0,TB,-10.0,46.5,38.0
2634,13324,2022,6,Seattle Seahawks,Arizona Cardinals,19.0,9.0,ARI,-2.5,50.5,28.0


In [40]:
# Change over_under_line to float so I can compare to total score to figure out if it went over/under/pushed
spread_df['over_under_line'] = spread_df['over_under_line'].astype(float)
spread_df['Over_Total_Points'] = np.where(spread_df['Total_Score'] > spread_df['over_under_line'], 'Yes',
                           np.where(spread_df['Total_Score'] < spread_df['over_under_line'], 'No', 'Push'))
# spread_df['Over_Spread'].unique().tolist()
spread_df

Unnamed: 0,index,schedule_season,schedule_week,team_home,team_away,score_home,score_away,team_favorite_id,spread_favorite,over_under_line,Total_Score,Over_Total_Points
0,10542,2012,1,New York Giants,Dallas Cowboys,17.0,24.0,NYG,-4.0,46.0,41.0,No
1,10543,2012,1,Arizona Cardinals,Seattle Seahawks,20.0,16.0,SEA,-3.0,41.0,36.0,No
2,10544,2012,1,Chicago Bears,Indianapolis Colts,41.0,21.0,CHI,-10.0,42.5,62.0,Yes
3,10545,2012,1,Cleveland Browns,Philadelphia Eagles,16.0,17.0,PHI,-9.5,42.0,33.0,No
4,10546,2012,1,Denver Broncos,Pittsburgh Steelers,31.0,19.0,DEN,-2.0,45.5,50.0,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...
2631,13321,2022,6,New York Giants,Baltimore Ravens,24.0,20.0,BAL,-5.5,45.5,44.0,No
2632,13322,2022,6,Philadelphia Eagles,Dallas Cowboys,26.0,17.0,PHI,-6.5,42.5,43.0,Yes
2633,13323,2022,6,Pittsburgh Steelers,Tampa Bay Buccaneers,20.0,18.0,TB,-10.0,46.5,38.0,No
2634,13324,2022,6,Seattle Seahawks,Arizona Cardinals,19.0,9.0,ARI,-2.5,50.5,28.0,No


In [41]:
# Alter team names that have changed over the last 10 years to the current team name
# spread_df.loc[df['team_home'] == 'San Diego Chargers', 'team_home'] = 'Los Angeles Chargers'
# spread_df.loc[df['team_home'] == 'Oakland Raiders', 'team_home'] = 'Las Vegas Raiders'

# df['team_home'] = np.where(df['team_home'] == 'San Diego Chargers', 'Los Angeles Chargers', df['team_home'])

# df['team_home'] = df['team_home'].mask(df['team_home'] == 'San Diego Chargers', 'Los Angeles Chargers' , inplace=True)

spread_df.loc[spread_df['team_home'] == 'San Diego Chargers', 'team_home'] = 'Los Angeles Chargers'
spread_df.loc[spread_df['team_home'] == 'St. Louis Rams', 'team_home'] = 'Los Angeles Rams'
spread_df.loc[spread_df['team_home'] == 'Oakland Raiders', 'team_home'] = 'Las Vegas Raiders'
spread_df.loc[spread_df['team_home'] == 'Washington Football Team', 'team_home'] = 'Washington Commanders'
spread_df.loc[spread_df['team_home'] == 'Washington Redskins', 'team_home'] = 'Washington Commanders'

spread_df.loc[spread_df['team_away'] == 'San Diego Chargers', 'team_away'] = 'Los Angeles Chargers'
spread_df.loc[spread_df['team_away'] == 'St. Louis Rams', 'team_away'] = 'Los Angeles Rams'
spread_df.loc[spread_df['team_away'] == 'Oakland Raiders', 'team_away'] = 'Las Vegas Raiders'
spread_df.loc[spread_df['team_away'] == 'Washington Football Team', 'team_away'] = 'Washington Commanders'
spread_df.loc[spread_df['team_away'] == 'Washington Redskins', 'team_away'] = 'Washington Commanders'

# spread_df.to_excel('dannytest.xlsx')
# pd.concat([spread_df['team_home'], spread_df['team_favorite_id']]).unique()

In [42]:
# Bring in a CSV breaking down team abbreviation so I can compare Favorite to a team
# PICK means that there isn't a favorite and they are equal
team_df = pd.read_csv("TeamABV.csv")
team_df

Unnamed: 0,Abbreviation,Team_Favorite
0,NYG,New York Giants
1,SEA,Seattle Seahawks
2,CHI,Chicago Bears
3,PHI,Philadelphia Eagles
4,DEN,Denver Broncos
5,DET,Detroit Lions
6,GB,Green Bay Packers
7,HOU,Houston Texans
8,ATL,Atlanta Falcons
9,MIN,Minnesota Vikings


In [43]:
bet_df = ps.sqldf("Select * FROM spread_df sdf join team_df tdf on sdf.team_favorite_id	 = tdf.Abbreviation")
bet_df

Unnamed: 0,index,schedule_season,schedule_week,team_home,team_away,score_home,score_away,team_favorite_id,spread_favorite,over_under_line,Total_Score,Over_Total_Points,Abbreviation,Team_Favorite
0,10542,2012,1,New York Giants,Dallas Cowboys,17.0,24.0,NYG,-4.0,46.0,41.0,No,NYG,New York Giants
1,10543,2012,1,Arizona Cardinals,Seattle Seahawks,20.0,16.0,SEA,-3.0,41.0,36.0,No,SEA,Seattle Seahawks
2,10544,2012,1,Chicago Bears,Indianapolis Colts,41.0,21.0,CHI,-10.0,42.5,62.0,Yes,CHI,Chicago Bears
3,10545,2012,1,Cleveland Browns,Philadelphia Eagles,16.0,17.0,PHI,-9.5,42.0,33.0,No,PHI,Philadelphia Eagles
4,10546,2012,1,Denver Broncos,Pittsburgh Steelers,31.0,19.0,DEN,-2.0,45.5,50.0,Yes,DEN,Denver Broncos
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2631,13321,2022,6,New York Giants,Baltimore Ravens,24.0,20.0,BAL,-5.5,45.5,44.0,No,BAL,Baltimore Ravens
2632,13322,2022,6,Philadelphia Eagles,Dallas Cowboys,26.0,17.0,PHI,-6.5,42.5,43.0,Yes,PHI,Philadelphia Eagles
2633,13323,2022,6,Pittsburgh Steelers,Tampa Bay Buccaneers,20.0,18.0,TB,-10.0,46.5,38.0,No,TB,Tampa Bay Buccaneers
2634,13324,2022,6,Seattle Seahawks,Arizona Cardinals,19.0,9.0,ARI,-2.5,50.5,28.0,No,ARI,Arizona Cardinals


In [44]:
# Rearrange the columns and remove columns we don't need
nfl_df = bet_df[['schedule_season', 'schedule_week', 'team_home', 'team_away', 'Team_Favorite', 'spread_favorite', 'score_home', 'score_away', 'over_under_line', 'Total_Score', 'Over_Total_Points']]
nfl_df

Unnamed: 0,schedule_season,schedule_week,team_home,team_away,Team_Favorite,spread_favorite,score_home,score_away,over_under_line,Total_Score,Over_Total_Points
0,2012,1,New York Giants,Dallas Cowboys,New York Giants,-4.0,17.0,24.0,46.0,41.0,No
1,2012,1,Arizona Cardinals,Seattle Seahawks,Seattle Seahawks,-3.0,20.0,16.0,41.0,36.0,No
2,2012,1,Chicago Bears,Indianapolis Colts,Chicago Bears,-10.0,41.0,21.0,42.5,62.0,Yes
3,2012,1,Cleveland Browns,Philadelphia Eagles,Philadelphia Eagles,-9.5,16.0,17.0,42.0,33.0,No
4,2012,1,Denver Broncos,Pittsburgh Steelers,Denver Broncos,-2.0,31.0,19.0,45.5,50.0,Yes
...,...,...,...,...,...,...,...,...,...,...,...
2631,2022,6,New York Giants,Baltimore Ravens,Baltimore Ravens,-5.5,24.0,20.0,45.5,44.0,No
2632,2022,6,Philadelphia Eagles,Dallas Cowboys,Philadelphia Eagles,-6.5,26.0,17.0,42.5,43.0,Yes
2633,2022,6,Pittsburgh Steelers,Tampa Bay Buccaneers,Tampa Bay Buccaneers,-10.0,20.0,18.0,46.5,38.0,No
2634,2022,6,Seattle Seahawks,Arizona Cardinals,Arizona Cardinals,-2.5,19.0,9.0,50.5,28.0,No


In [45]:
# spread_df['Over_Total_Points'] = np.where(spread_df['Total_Score'] > spread_df['over_under_line'], 'Yes',
#                            np.where(spread_df['Total_Score'] < spread_df['over_under_line'], 'No', 'Push'))


# warnings.filterwarnings('ignore')

nfl_df['home_team_adjusted'] = np.where(nfl_df['team_home'] == nfl_df['Team_Favorite'], nfl_df['score_home'] + nfl_df['spread_favorite'],
                               np.where(nfl_df['team_home'] != nfl_df['Team_Favorite'], nfl_df['score_home'] - nfl_df['spread_favorite'], "Wrong"))

# bet_df.dtypes
nfl_df['home_team_adjusted'] = nfl_df['home_team_adjusted'].astype(float)


nfl_df['Spread_Winner'] = np.where(nfl_df['home_team_adjusted'] > nfl_df['score_away'], nfl_df['team_home'],
                           np.where(nfl_df['score_away'] > nfl_df['home_team_adjusted'], nfl_df['team_away'], 'Push'))

nfl_df

# nfl_df.to_excel('dannytest2.xlsx') 



                                        



Unnamed: 0,schedule_season,schedule_week,team_home,team_away,Team_Favorite,spread_favorite,score_home,score_away,over_under_line,Total_Score,Over_Total_Points,home_team_adjusted,Spread_Winner
0,2012,1,New York Giants,Dallas Cowboys,New York Giants,-4.0,17.0,24.0,46.0,41.0,No,13.0,Dallas Cowboys
1,2012,1,Arizona Cardinals,Seattle Seahawks,Seattle Seahawks,-3.0,20.0,16.0,41.0,36.0,No,23.0,Arizona Cardinals
2,2012,1,Chicago Bears,Indianapolis Colts,Chicago Bears,-10.0,41.0,21.0,42.5,62.0,Yes,31.0,Chicago Bears
3,2012,1,Cleveland Browns,Philadelphia Eagles,Philadelphia Eagles,-9.5,16.0,17.0,42.0,33.0,No,25.5,Cleveland Browns
4,2012,1,Denver Broncos,Pittsburgh Steelers,Denver Broncos,-2.0,31.0,19.0,45.5,50.0,Yes,29.0,Denver Broncos
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2631,2022,6,New York Giants,Baltimore Ravens,Baltimore Ravens,-5.5,24.0,20.0,45.5,44.0,No,29.5,New York Giants
2632,2022,6,Philadelphia Eagles,Dallas Cowboys,Philadelphia Eagles,-6.5,26.0,17.0,42.5,43.0,Yes,19.5,Philadelphia Eagles
2633,2022,6,Pittsburgh Steelers,Tampa Bay Buccaneers,Tampa Bay Buccaneers,-10.0,20.0,18.0,46.5,38.0,No,30.0,Pittsburgh Steelers
2634,2022,6,Seattle Seahawks,Arizona Cardinals,Arizona Cardinals,-2.5,19.0,9.0,50.5,28.0,No,21.5,Seattle Seahawks
