In [1]:
import pandas as pd

df_nfl = pd.read_csv('nfl_merged.csv')

df_nfl.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,...,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,team_id_home,home_conference,home_division,team_id_away,away_conference,away_division
0,9/2/1966,1966,1,False,miami dolphins,14.0,23.0,oakland raiders,,,...,83.0,6.0,71.0,,MIA,AFC,AFC East,LVR,AFC,AFC West
1,9/3/1966,1966,1,False,houston oilers,45.0,7.0,denver broncos,,,...,81.0,7.0,70.0,,TEN,AFC,,DEN,AFC,AFC West
2,9/4/1966,1966,1,False,san diego chargers,27.0,7.0,buffalo bills,,,...,70.0,7.0,82.0,,LAC,AFC,AFC West,BUF,AFC,AFC East
3,9/9/1966,1966,2,False,miami dolphins,14.0,19.0,new york jets,,,...,82.0,11.0,78.0,,MIA,AFC,AFC East,NYJ,NFC,AFC East
4,9/10/1966,1966,1,False,green bay packers,24.0,3.0,baltimore colts,,,...,64.0,8.0,62.0,,GB,NFC,NFC North,IND,AFC,


In [2]:
#drop games with no betting lines
df_nfl = df_nfl.dropna(subset=['team_favorite_id', 'spread_favorite', 'over_under_line'])

#clean columns
df_nfl['over_under_line'] = df_nfl['over_under_line'].astype(str).str.strip()

df_nfl['over_under_line'] = pd.to_numeric(df_nfl['over_under_line'], errors='coerce')

df_nfl.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,...,weather_temperature,weather_wind_mph,weather_humidity,weather_detail,team_id_home,home_conference,home_division,team_id_away,away_conference,away_division
350,1/14/1968,1967,Superbowl,True,green bay packers,33.0,14.0,oakland raiders,GB,-13.5,...,60.0,12.0,74.0,,GB,NFC,NFC North,LVR,AFC,AFC West
538,1/12/1969,1968,Superbowl,True,baltimore colts,7.0,16.0,new york jets,IND,-18.0,...,66.0,12.0,80.0,rain,IND,AFC,,NYJ,NFC,AFC East
727,1/11/1970,1969,Superbowl,True,kansas city chiefs,23.0,7.0,minnesota vikings,MIN,-12.0,...,55.0,14.0,84.0,rain,KC,AFC,AFC West,MIN,NFC,NFC North
916,1/17/1971,1970,Superbowl,True,baltimore colts,16.0,13.0,dallas cowboys,IND,-2.5,...,59.0,11.0,60.0,,IND,AFC,,DAL,NFC,NFC East
1105,1/16/1972,1971,Superbowl,True,dallas cowboys,24.0,3.0,miami dolphins,DAL,-6.0,...,34.0,18.0,40.0,,DAL,NFC,NFC East,MIA,AFC,AFC East


In [3]:
#create probability columns

#win probability
df_nfl['home_win'] = (df_nfl['score_home'] > df_nfl['score_away']).astype(int)

#spread cover probability
df_nfl['score_diff'] = df_nfl['score_home'] - df_nfl['score_away']

df_nfl['is_home_favorite'] = (df_nfl['team_favorite_id'] == df_nfl['team_id_home']).astype(int)

df_nfl['favorite_score_diff'] = df_nfl['score_diff'].where(
    df_nfl['is_home_favorite'] == 1,
    -df_nfl['score_diff']
)

df_nfl['cover_margin'] = df_nfl['favorite_score_diff'] + df_nfl['spread_favorite']

df_nfl['favorite_cover'] = (df_nfl['cover_margin'] > 0).astype(int)

#over/under probability
df_nfl['total_score'] = df_nfl['score_home'] + df_nfl['score_away']

df_nfl['over_result'] = (df_nfl['total_score'] > df_nfl['over_under_line']).astype(int)

df_nfl['over_under_margin'] = df_nfl['total_score'] - df_nfl['over_under_line']

df_nfl.head()

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,...,away_division,home_win,score_diff,is_home_favorite,favorite_score_diff,cover_margin,favorite_cover,total_score,over_result,over_under_margin
350,1/14/1968,1967,Superbowl,True,green bay packers,33.0,14.0,oakland raiders,GB,-13.5,...,AFC West,1,19.0,1,19.0,5.5,1,47.0,1,4.0
538,1/12/1969,1968,Superbowl,True,baltimore colts,7.0,16.0,new york jets,IND,-18.0,...,AFC East,0,-9.0,1,-9.0,-27.0,0,23.0,0,-17.0
727,1/11/1970,1969,Superbowl,True,kansas city chiefs,23.0,7.0,minnesota vikings,MIN,-12.0,...,NFC North,1,16.0,0,-16.0,-28.0,0,30.0,0,-9.0
916,1/17/1971,1970,Superbowl,True,baltimore colts,16.0,13.0,dallas cowboys,IND,-2.5,...,NFC East,1,3.0,1,3.0,0.5,1,29.0,0,-7.0
1105,1/16/1972,1971,Superbowl,True,dallas cowboys,24.0,3.0,miami dolphins,DAL,-6.0,...,AFC East,1,21.0,1,21.0,15.0,1,27.0,0,-7.0


In [4]:
df_nfl.to_csv('nfl_final.csv', index=False)