In [1]:
import pandas as pd
import numpy as np
from clean import clean_games
from bs4 import BeautifulSoup
import requests
import re


This page reads in a csv from kaggle.com to take in game scores.

Download for yourself [here](https://www.kaggle.com/tobycrabtree/nfl-scores-and-betting-data) (no subscription needed, but you will need to create Kaggle account).

In [2]:
spread_df = pd.read_csv("data/spreadspoke_scores.csv")

mask = spread_df.schedule_season >= 1980
spread_df = spread_df[mask]
spread_df.sample(10)

Unnamed: 0,schedule_date,schedule_season,schedule_week,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite,over_under_line,stadium,stadium_neutral,weather_temperature,weather_wind_mph,weather_humidity,weather_detail
8003,11/10/02,2002,10,False,Baltimore Ravens,38.0,27.0,Cincinnati Bengals,BAL,-4.0,37.0,M&T Bank Stadium,False,58.0,8.0,84.0,
4930,9/16/90,1990,2,False,Dallas Cowboys,7.0,28.0,New York Giants,NYG,-7.5,37.0,Texas Stadium,False,84.0,5.0,58.0,
9436,12/23/07,2007,16,False,Cincinnati Bengals,19.0,14.0,Cleveland Browns,CLE,-3.0,47.5,Paul Brown Stadium,False,45.0,20.0,72.0,
8045,11/24/02,2002,12,False,Tampa Bay Buccaneers,21.0,7.0,Green Bay Packers,TB,-3.0,38.5,Raymond James Stadium,False,53.0,5.0,76.0,
6301,12/16/95,1995,16,False,New Orleans Saints,23.0,34.0,Green Bay Packers,GB,-5.0,46.0,Louisiana Superdome,False,72.0,0.0,,DOME
10496,12/18/11,2011,15,False,San Diego Chargers,34.0,14.0,Baltimore Ravens,BAL,-2.5,44.0,Qualcomm Stadium,False,58.0,7.0,67.0,
7254,11/25/99,1999,12,False,Detroit Lions,21.0,17.0,Chicago Bears,DET,-6.0,44.0,Pontiac Silverdome,False,72.0,0.0,,DOME
9753,9/13/09,2009,1,False,Seattle Seahawks,28.0,0.0,St. Louis Rams,SEA,-7.0,41.0,CenturyLink Field,False,74.0,8.0,63.0,
9371,11/25/07,2007,12,False,Carolina Panthers,6.0,31.0,New Orleans Saints,NO,-3.0,42.0,Bank of America Stadium,False,44.0,6.0,46.0,
10613,10/7/12,2012,5,False,New Orleans Saints,31.0,24.0,San Diego Chargers,NO,-3.5,52.5,Louisiana Superdome,False,72.0,0.0,,DOME


In [3]:
spread_df.drop(columns=['schedule_week', 'over_under_line', 'stadium', 'stadium_neutral', 'weather_temperature', 'weather_wind_mph', 'weather_humidity', 'weather_detail'], inplace=True)



In [4]:
spread_df.team_away.unique()

array(['Miami Dolphins', 'Tampa Bay Buccaneers', 'Chicago Bears',
       'Oakland Raiders', 'Detroit Lions', 'Atlanta Falcons',
       'Cleveland Browns', 'San Francisco 49ers', 'Baltimore Colts',
       'Denver Broncos', 'Houston Oilers', 'San Diego Chargers',
       'New York Giants', 'Dallas Cowboys', 'Los Angeles Rams',
       'Pittsburgh Steelers', 'New York Jets', 'New Orleans Saints',
       'Seattle Seahawks', 'Cincinnati Bengals', 'Philadelphia Eagles',
       'Washington Redskins', 'St. Louis Cardinals', 'Minnesota Vikings',
       'Kansas City Chiefs', 'Green Bay Packers', 'Buffalo Bills',
       'New England Patriots', 'Los Angeles Raiders',
       'Indianapolis Colts', 'Phoenix Cardinals', 'Arizona Cardinals',
       'Carolina Panthers', 'St. Louis Rams', 'Jacksonville Jaguars',
       'Baltimore Ravens', 'Tennessee Oilers', 'Tennessee Titans',
       'Houston Texans', 'Los Angeles Chargers', 'Las Vegas Raiders',
       'Washington Football Team'], dtype=object)

In [5]:
game_df = clean_games("data/games_scraped.pickle", start_year=1980)

In [6]:
game_df.team.unique()

array(['crd', 'atl', 'rav', 'buf', 'car', 'chi', 'cin', 'cle', 'dal',
       'den', 'det', 'gnb', 'htx', 'clt', 'jax', 'kan', 'mia', 'min',
       'nwe', 'nor', 'nyg', 'nyj', 'rai', 'phi', 'pit', 'sdg', 'sfo',
       'sea', 'ram', 'tam', 'oti', 'was'], dtype=object)

Let's change the team names in spread_df to match the abbrevs we're using in game_df.

In [7]:
team_map = {
    'Miami Dolphins': 'mia',
    'Tampa Bay Buccaneers': 'tam',
    'Chicago Bears': 'chi',
    'Oakland Raiders': 'rai',
    'Detroit Lions': 'det', 
    'Atlanta Falcons': 'atl',
    'Cleveland Browns': 'cle', 
    'San Francisco 49ers': 'sfo', 
    'Baltimore Colts': 'clt',
    'Denver Broncos': 'den', 
    'Houston Oilers': 'oti', 
    'San Diego Chargers': 'sdg',
    'New York Giants': 'nyg', 
    'Dallas Cowboys': 'dal', 
    'Los Angeles Rams': 'ram',
    'Pittsburgh Steelers': 'pit',
    'New York Jets': 'nyj',
    'New Orleans Saints': 'nor',
    'Seattle Seahawks': 'sea',
    'Cincinnati Bengals': 'cin',
    'Philadelphia Eagles': 'phi',
    'Washington Redskins': 'was',
    'St. Louis Cardinals': 'crd',
    'Minnesota Vikings': 'min',
    'Kansas City Chiefs': 'kan', 
    'Green Bay Packers': 'gnb', 
    'Buffalo Bills': 'buf',
    'New England Patriots': 'nwe',
    'Los Angeles Raiders': 'rai',
    'Indianapolis Colts': 'clt',
    'Phoenix Cardinals': 'crd',     
    'Arizona Cardinals': 'crd',
    'Carolina Panthers': 'car', 
    'St. Louis Rams': 'ram',
    'Jacksonville Jaguars': 'jax',
    'Baltimore Ravens': 'rav', 
    'Tennessee Oilers': 'oti', 
    'Tennessee Titans': 'oti',
    'Houston Texans': 'htx', 
    'Los Angeles Chargers': 'sdg',
    'Las Vegas Raiders': 'rai',
    'Washington Football Team': 'was',  
}

In [8]:
spread_df['team_home'] = spread_df['team_home'].map(team_map)

In [9]:
spread_df['team_away'] = spread_df['team_away'].map(team_map)

In [10]:
abbrev_map = {'MIA': 'mia',
 'TB': 'tam',
 'CHI': 'chi',
 'KC': 'kan',
 'LAR': 'ram',
 'MIN': 'min',
 'NE': 'nwe',
 'NO': 'nor',
 'NYJ':'nyj',
 'PHI': 'phi',
 'PIT': 'pit',
 'LAC': 'sdg',
 'ARI': 'crd',
 'WAS': 'was',
 'PICK': 'PICK',
 'DEN': 'den',
 'DET': 'det',
 'SF': 'sfo',
 'TEN': 'oti',
 'ATL': 'atl',
 'CLE': 'cle',
 'DAL': 'dal',
 'LVR': 'rai',
 'SEA': 'sea',
 'IND': 'clt',
 'BUF': 'buf',
 'CIN': 'cin',
 'GB': 'gnb',
 'NYG': 'nyg',
 'BAL': 'rav',
 'JAX': 'jax',
 'CAR': 'car',
 'HOU': 'htx',
}

In [11]:
spread_df['team_favorite_id'] = spread_df['team_favorite_id'].map(abbrev_map)

In [12]:
spread_df

Unnamed: 0,schedule_date,schedule_season,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite
2734,9/7/80,1980,False,buf,17.0,7.0,mia,mia,-3.0
2735,9/7/80,1980,False,cin,12.0,17.0,tam,tam,-1.0
2736,9/7/80,1980,False,gnb,12.0,6.0,chi,chi,-6.0
2737,9/7/80,1980,False,kan,14.0,27.0,rai,kan,-3.0
2738,9/7/80,1980,False,ram,20.0,41.0,det,ram,-9.0
...,...,...,...,...,...,...,...,...,...
12929,1/3/21,2020,False,nwe,,,nyj,,
12930,1/3/21,2020,False,nyg,,,dal,,
12931,1/3/21,2020,False,phi,,,was,,
12932,1/3/21,2020,False,sfo,,,sea,,


In [13]:
spread_df['schedule_date'] = pd.to_datetime(spread_df['schedule_date'])

In [14]:
spread_df.dropna(axis=0, subset=['spread_favorite'], inplace=True)

In [15]:
spread_df

Unnamed: 0,schedule_date,schedule_season,schedule_playoff,team_home,score_home,score_away,team_away,team_favorite_id,spread_favorite
2734,1980-09-07,1980,False,buf,17.0,7.0,mia,mia,-3.0
2735,1980-09-07,1980,False,cin,12.0,17.0,tam,tam,-1.0
2736,1980-09-07,1980,False,gnb,12.0,6.0,chi,chi,-6.0
2737,1980-09-07,1980,False,kan,14.0,27.0,rai,kan,-3.0
2738,1980-09-07,1980,False,ram,20.0,41.0,det,ram,-9.0
...,...,...,...,...,...,...,...,...,...
12792,2020-11-01,2020,False,kan,35.0,9.0,nyj,kan,-19.5
12793,2020-11-01,2020,False,mia,28.0,17.0,ram,ram,-3.5
12794,2020-11-01,2020,False,phi,23.0,9.0,dal,phi,-11.5
12795,2020-11-01,2020,False,sea,37.0,27.0,sfo,sea,-3.0


In [16]:
def apply_game_id(row):
    teams = []
    teams.append(str(row["team_home"]))
    teams.append(str(row["team_away"]))
    teams.sort()

    game_id = teams[0] + "-" + teams[1] + "-" + str(row["schedule_date"])[:-9]
    return game_id

spread_df["game_id"] = spread_df.apply(apply_game_id, axis=1)

In [17]:
spread_df.index = spread_df.game_id


In [18]:
spread_df.loc['sea-sfo-2020-11-01']['team_favorite_id']

'sea'

In [19]:
mask = game_df['team_year'] == 'chi-2019'
game_df[mask]

Unnamed: 0,team,year,team_year,date,opp,week_num,decade,game_day_of_week,game_outcome,team_record,...,ewma_yards_off_opp,ewma_pass_yds_off_opp,ewma_rush_yds_off_opp,ewma_to_off_opp,ewma_to2_off_opp,ewma_yards_def_opp,ewma_pass_yds_def_opp,ewma_rush_yds_def_opp,ewma_to_def_opp,ewma_to2_def_opp
2851,chi,2019,chi-2019,2019-09-29,min,4,201,Sun,W,3-1,...,362.997,167.719,195.278,1.326,5.306,325.524,223.343,102.181,1.917,4.336
2852,chi,2019,chi-2019,2019-10-06,rai,5,201,Sun,L,3-2,...,336.851,207.394,129.456,1.027,1.492,384.007,277.499,106.508,0.83,1.427
2853,chi,2019,chi-2019,2019-10-20,nor,7,201,Sun,L,3-3,...,342.859,238.584,104.275,0.777,0.777,323.756,227.263,96.493,1.15,2.191
2854,chi,2019,chi-2019,2019-10-27,sdg,8,201,Sun,L,3-4,...,359.904,294.337,65.567,1.734,4.149,332.227,216.46,115.766,1.17,1.699
2855,chi,2019,chi-2019,2019-11-03,phi,9,201,Sun,L,3-5,...,341.044,208.965,132.078,1.867,5.43,341.013,241.146,99.868,1.512,3.32
2856,chi,2019,chi-2019,2019-11-10,det,10,201,Sun,W,4-5,...,396.016,303.962,92.054,1.401,2.657,428.136,290.26,137.876,1.337,3.155
2857,chi,2019,chi-2019,2019-11-17,ram,11,201,Sun,L,4-6,...,372.553,279.344,93.209,1.782,5.692,329.327,243.18,86.147,1.539,3.323
2858,chi,2019,chi-2019,2019-11-24,nyg,12,201,Sun,W,5-6,...,306.915,221.808,85.107,2.397,6.775,370.294,249.501,120.793,1.215,2.895
2859,chi,2019,chi-2019,2019-11-28,det,13,201,Thu,W,6-6,...,374.532,264.678,109.855,1.566,4.185,383.42,269.406,114.014,1.231,2.602
2860,chi,2019,chi-2019,2019-12-05,dal,14,201,Thu,W,7-6,...,426.295,307.479,118.816,1.371,2.569,319.987,209.679,110.308,0.852,2.45


In [20]:
def apply_spread_cols(row):
    id_ = row['game_id']
    tm = row['team']
    opp = row['opp']
    
    try:
        fav = spread_df.loc[id_]['team_favorite_id']
        spread = spread_df.loc[id_]['spread_favorite']
    
        if fav == 'PICK':
            return 0

        if tm == fav:
            return spread * -1.0 # margin should be positive in game_df if 'team' is favored

        if opp == fav:
            return spread
    except:
        return np.NaN
        
game_df['vegas_pred_margin'] = game_df.apply(apply_spread_cols, axis=1)
    

In [21]:
mask = game_df['team_year'] == 'chi-2020'
game_df[mask]

Unnamed: 0,team,year,team_year,date,opp,week_num,decade,game_day_of_week,game_outcome,team_record,...,ewma_pass_yds_off_opp,ewma_rush_yds_off_opp,ewma_to_off_opp,ewma_to2_off_opp,ewma_yards_def_opp,ewma_pass_yds_def_opp,ewma_rush_yds_def_opp,ewma_to_def_opp,ewma_to2_def_opp,vegas_pred_margin
2864,chi,2020,chi-2020,2020-10-04,clt,4,202,Sun,L,3-1,...,259.709,120.541,0.917,1.502,226.255,132.138,94.117,2.122,6.367,-3.0
2865,chi,2020,chi-2020,2020-10-08,tam,5,202,Thu,W,4-1,...,281.761,98.29,1.379,3.076,311.249,249.382,61.867,2.055,5.97,-3.5
2866,chi,2020,chi-2020,2020-10-18,car,6,202,Sun,W,5-1,...,283.041,119.69,0.918,3.003,354.574,219.182,135.392,1.611,4.321,-1.5
2867,chi,2020,chi-2020,2020-10-26,ram,7,202,Mon,L,5-2,...,249.129,130.168,1.153,1.459,310.958,204.722,106.236,0.885,2.001,-6.0
2868,chi,2020,chi-2020,2020-11-01,nor,8,202,Sun,L,5-3,...,269.513,121.425,0.881,0.881,324.894,238.315,86.579,0.666,1.381,-4.0
2869,chi,2020,chi-2020,2020-11-08,oti,9,202,Sun,L,5-4,...,249.576,161.055,0.638,0.952,393.438,269.755,123.683,1.626,4.473,
2870,chi,2020,chi-2020,2020-11-16,min,10,202,Mon,L,5-5,...,223.273,167.981,1.329,3.577,410.104,288.414,121.689,1.295,2.41,
2871,chi,2020,chi-2020,2020-11-29,gnb,12,202,Sun,L,5-6,...,280.147,104.645,1.239,3.684,343.588,229.234,114.353,1.014,1.6,
2872,chi,2020,chi-2020,2020-12-06,det,13,202,Sun,L,5-7,...,250.316,92.887,1.362,3.145,398.552,272.557,125.995,1.029,1.802,
2873,chi,2020,chi-2020,2020-12-13,htx,14,202,Sun,W,6-7,...,284.715,84.131,0.964,1.48,403.686,263.05,140.636,0.761,1.831,


In [22]:
response = requests.get("https://www.oddsshark.com/nfl/2020-spreads-all-games")
soup = BeautifulSoup(response.text, "lxml")

h2s = str(soup.find_all('div')).split('<h2>')

i = 89

spreads = []

while i <= 105:
    
    text = h2s[i]
    soup = BeautifulSoup(text, 'lxml')
    week_num = int(re.search(r"NFL Week ([0-9]+) Odds", str(text))[1])
    

    soup = BeautifulSoup(text, 'lxml')
    weeks = soup.find_all('tr')
    
    for week in weeks:
        team_spread = {}
        pattern = r"<td>([ \w]+)</td><td>([\+-][0-9]+)<"
        try:
            result = re.search(pattern, str(week))
            team_name = result[1]
            spread = float(result[2])
            team_spread['week_num'] = week_num
            team_spread['team'] = team_name
            team_spread['spread'] = spread
            spreads.append(team_spread)
        except:
            continue
            
    
    i += 1
    
spreads

[{'week_num': 1, 'team': 'Miami Dolphins', 'spread': 6.0},
 {'week_num': 1, 'team': 'New England Patriots', 'spread': -6.0},
 {'week_num': 1, 'team': 'Seattle Seahawks', 'spread': -1.0},
 {'week_num': 1, 'team': 'Atlanta Falcons', 'spread': 1.0},
 {'week_num': 1, 'team': 'Chicago Bears', 'spread': 3.0},
 {'week_num': 1, 'team': 'Detroit Lions', 'spread': -3.0},
 {'week_num': 1, 'team': 'Los Angeles Chargers', 'spread': -3.0},
 {'week_num': 1, 'team': 'Cincinnati Bengals', 'spread': 3.0},
 {'week_num': 1, 'team': 'Arizona Cardinals', 'spread': 7.0},
 {'week_num': 1, 'team': 'San Francisco 49ers', 'spread': -7.0},
 {'week_num': 1, 'team': 'Pittsburgh Steelers', 'spread': -4.0},
 {'week_num': 1, 'team': 'New York Giants', 'spread': 4.0},
 {'week_num': 2, 'team': 'Cincinnati Bengals', 'spread': 8.0},
 {'week_num': 2, 'team': 'Cleveland Browns', 'spread': -8.0},
 {'week_num': 2, 'team': 'Los Angeles Rams', 'spread': 4.0},
 {'week_num': 2, 'team': 'Philadelphia Eagles', 'spread': -4.0},
 {'w

In [23]:
spread_df_2020 = pd.DataFrame(spreads)
spread_df_2020.head()

Unnamed: 0,week_num,team,spread
0,1,Miami Dolphins,6.0
1,1,New England Patriots,-6.0
2,1,Seattle Seahawks,-1.0
3,1,Atlanta Falcons,1.0
4,1,Chicago Bears,3.0


In [24]:
game_df.team.unique()

array(['crd', 'atl', 'rav', 'buf', 'car', 'chi', 'cin', 'cle', 'dal',
       'den', 'det', 'gnb', 'htx', 'clt', 'jax', 'kan', 'mia', 'min',
       'nwe', 'nor', 'nyg', 'nyj', 'rai', 'phi', 'pit', 'sdg', 'sfo',
       'sea', 'ram', 'tam', 'oti', 'was'], dtype=object)

In [25]:
list(spread_df_2020.team.unique())

['Miami Dolphins',
 'New England Patriots',
 'Seattle Seahawks',
 'Atlanta Falcons',
 'Chicago Bears',
 'Detroit Lions',
 'Los Angeles Chargers',
 'Cincinnati Bengals',
 'Arizona Cardinals',
 'San Francisco 49ers',
 'Pittsburgh Steelers',
 'New York Giants',
 'Cleveland Browns',
 'Los Angeles Rams',
 'Philadelphia Eagles',
 'Carolina Panthers',
 'Tampa Bay Buccaneers',
 'Denver Broncos',
 'Dallas Cowboys',
 'Buffalo Bills',
 'Jacksonville Jaguars',
 'Tennessee Titans',
 'Baltimore Ravens',
 'Houston Texans',
 'Kansas City Chiefs',
 'Washington Football Team',
 'Green Bay Packers',
 'New York Jets',
 'Indianapolis Colts',
 'Minnesota Vikings',
 'New Orleans Saints',
 'Las Vegas Raiders']

In [26]:
oddshark_map = {
'Miami Dolphins': 'mia',
'New England Patriots': 'nwe',
'Seattle Seahawks': 'sea',
'Atlanta Falcons': 'atl',
'Chicago Bears': 'chi',
'Detroit Lions': 'det',
'Los Angeles Chargers': 'sdg',
'Cincinnati Bengals': 'cin',
'Arizona Cardinals': 'crd',
'San Francisco 49ers': 'sfo',
'Pittsburgh Steelers': 'pit',
'New York Giants': 'nyg',
'Cleveland Browns': 'cle',
'Los Angeles Rams': 'ram',
'Philadelphia Eagles': 'phi',
'Carolina Panthers': 'car',
'Tampa Bay Buccaneers': 'tam',
'Denver Broncos': 'den',
'Dallas Cowboys': 'dal',
'Buffalo Bills': 'buf',
'Jacksonville Jaguars': 'jax',
'Tennessee Titans': 'oti',
'Baltimore Ravens': 'rav',
'Houston Texans': 'htx',
'Kansas City Chiefs': 'kan',
'Washington Football Team': 'was',
'Green Bay Packers': 'gnb',
'New York Jets': 'nyj',
'Indianapolis Colts': 'col',
'Minnesota Vikings': 'min',
'New Orleans Saints': 'nor',
'Las Vegas Raiders': 'rai',
}

In [27]:
spread_df_2020['team'] = spread_df_2020['team'].map(oddshark_map)
spread_df_2020

Unnamed: 0,week_num,team,spread
0,1,mia,6.0
1,1,nwe,-6.0
2,1,sea,-1.0
3,1,atl,1.0
4,1,chi,3.0
...,...,...,...
241,17,nyg,3.0
242,17,oti,1.0
243,17,htx,-1.0
244,17,crd,6.0


In [28]:
spread_df_2020['team_week'] = spread_df_2020['week_num'].astype(str) + '-' + spread_df_2020['team']
spread_df_2020.index = spread_df_2020['team_week']
spread_df_2020.head()

Unnamed: 0_level_0,week_num,team,spread,team_week
team_week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1-mia,1,mia,6.0,1-mia
1-nwe,1,nwe,-6.0,1-nwe
1-sea,1,sea,-1.0,1-sea
1-atl,1,atl,1.0,1-atl
1-chi,1,chi,3.0,1-chi


In [29]:
spreads

[{'week_num': 1, 'team': 'Miami Dolphins', 'spread': 6.0},
 {'week_num': 1, 'team': 'New England Patriots', 'spread': -6.0},
 {'week_num': 1, 'team': 'Seattle Seahawks', 'spread': -1.0},
 {'week_num': 1, 'team': 'Atlanta Falcons', 'spread': 1.0},
 {'week_num': 1, 'team': 'Chicago Bears', 'spread': 3.0},
 {'week_num': 1, 'team': 'Detroit Lions', 'spread': -3.0},
 {'week_num': 1, 'team': 'Los Angeles Chargers', 'spread': -3.0},
 {'week_num': 1, 'team': 'Cincinnati Bengals', 'spread': 3.0},
 {'week_num': 1, 'team': 'Arizona Cardinals', 'spread': 7.0},
 {'week_num': 1, 'team': 'San Francisco 49ers', 'spread': -7.0},
 {'week_num': 1, 'team': 'Pittsburgh Steelers', 'spread': -4.0},
 {'week_num': 1, 'team': 'New York Giants', 'spread': 4.0},
 {'week_num': 2, 'team': 'Cincinnati Bengals', 'spread': 8.0},
 {'week_num': 2, 'team': 'Cleveland Browns', 'spread': -8.0},
 {'week_num': 2, 'team': 'Los Angeles Rams', 'spread': 4.0},
 {'week_num': 2, 'team': 'Philadelphia Eagles', 'spread': -4.0},
 {'w

In [30]:
# for week in range(8, 18):
#     spread_mask = (spread_df_2020.week_num == week)
#     game_mask = (game_df.year == 2020) & (game_df.week_num == week)
#     game_df[game_mask]['vegas_pred_margin'] = (pd.concat([game_df.loc[game_mask, ['vegas_pred_margin'],
#                                                           spread_df_2020.loc[spread_mask]['spread']],
#                                                          join='inner', keys='team'))
                        
    

In [32]:
game_df['week_num'].unique()

array(['4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15',
       '16', '17', 'Wild Card', '18', 'Division', 'Conf', 'SuperBowl'],
      dtype=object)

In [35]:
def apply_2020_spreads(row):
    if row['vegas_pred_margin'] == np.NaN:

        team = row['team']
        week = row['week_num']

        team_week = week.astype(str) + '-' + team
        return team_week
    else:
        return row['vegas_pred_margin'] 


week_nums = ['8', '9', '10', '11', '12', '13', '14', '15', '16', '17', 'Wild Card', '18', 'Division', 'Conf', 'SuperBowl']
game_df['oddshark_spread'] = game_df[['team', 'week_num', 'year', 'vegas_pred_margin']].apply(apply_2020_spreads, axis=1)
    
    
    
    

In [36]:
game_df

Unnamed: 0,team,year,team_year,date,opp,week_num,decade,game_day_of_week,game_outcome,team_record,...,ewma_rush_yds_off_opp,ewma_to_off_opp,ewma_to2_off_opp,ewma_yards_def_opp,ewma_pass_yds_def_opp,ewma_rush_yds_def_opp,ewma_to_def_opp,ewma_to2_def_opp,vegas_pred_margin,oddshark_spread
0,crd,1980,crd-1980,1980-09-28,phi,4,198,Sun,W,1-3,...,178.273,2.044,4.884,231.519,162.505,69.014,1.668,3.005,-6.0,-6.0
1,crd,1980,crd-1980,1980-10-05,nor,5,198,Sun,W,2-3,...,73.541,1.361,3.618,383.964,229.436,154.528,2.664,9.575,1.0,1.0
2,crd,1980,crd-1980,1980-10-12,ram,6,198,Sun,L,2-4,...,151.027,1.798,5.249,300.837,188.258,112.580,2.740,9.534,-3.0,-3.0
3,crd,1980,crd-1980,1980-10-19,was,7,198,Sun,L,2-5,...,121.439,1.520,3.602,309.560,137.145,172.416,2.423,6.386,-4.0,-4.0
4,crd,1980,crd-1980,1980-10-26,clt,8,198,Sun,W,3-5,...,142.112,1.847,4.007,355.428,232.525,122.904,1.711,5.573,-8.0,-8.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16721,was,2020,was-2020,2020-12-13,sfo,14,202,Sun,W,6-7,...,100.743,2.127,5.910,334.037,229.319,104.718,1.557,4.661,,
16722,was,2020,was-2020,2020-12-20,sea,15,202,Sun,L,6-8,...,123.392,1.347,3.444,347.298,250.500,96.798,1.169,1.948,,
16723,was,2020,was-2020,2020-12-27,car,16,202,Sun,L,6-9,...,108.542,0.926,1.816,355.226,235.043,120.182,1.191,2.350,,
16724,was,2020,was-2020,2020-01-03,phi,17,202,Sun,W,7-9,...,134.576,1.585,4.008,395.080,268.268,126.812,1.238,2.822,,


In [38]:
mask = game_df.team_year == 'was-2019'
game_df[mask]

Unnamed: 0,team,year,team_year,date,opp,week_num,decade,game_day_of_week,game_outcome,team_record,...,ewma_rush_yds_off_opp,ewma_to_off_opp,ewma_to2_off_opp,ewma_yards_def_opp,ewma_pass_yds_def_opp,ewma_rush_yds_def_opp,ewma_to_def_opp,ewma_to2_def_opp,vegas_pred_margin,oddshark_spread
16699,was,2019,was-2019,2019-09-29,nyg,4,201,Sun,L,0-4,...,114.016,2.0,4.0,460.73,330.501,130.229,0.376,0.376,-3.0,-3.0
16700,was,2019,was-2019,2019-10-06,nwe,5,201,Sun,L,0-5,...,89.176,1.058,1.585,245.674,179.529,66.146,2.594,8.969,-16.5,-16.5
16701,was,2019,was-2019,2019-10-13,mia,6,201,Sun,W,1-5,...,54.559,2.108,6.13,462.022,293.276,168.746,0.496,0.496,5.5,5.5
16702,was,2019,was-2019,2019-10-20,sfo,7,201,Sun,L,1-6,...,179.506,1.971,6.721,227.058,138.281,88.777,2.326,7.237,-10.0,-10.0
16703,was,2019,was-2019,2019-10-24,min,8,201,Thu,L,1-7,...,157.307,1.221,3.147,334.635,245.861,88.775,1.515,3.369,-16.5,-16.5
16704,was,2019,was-2019,2019-11-03,buf,9,201,Sun,L,1-8,...,126.206,1.537,4.585,311.617,193.588,118.029,1.495,3.533,-10.5,-10.5
16705,was,2019,was-2019,2019-11-17,nyj,11,201,Sun,L,1-9,...,68.094,1.881,6.84,337.717,261.862,75.855,1.189,2.469,2.0,2.0
16706,was,2019,was-2019,2019-11-24,det,12,201,Sun,W,2-9,...,97.689,1.112,1.978,412.071,292.826,119.246,1.087,2.341,-4.0,-4.0
16707,was,2019,was-2019,2019-12-01,car,13,201,Sun,W,3-9,...,123.695,1.877,5.023,379.365,255.04,124.326,1.681,6.556,-10.5,-10.5
16708,was,2019,was-2019,2019-12-08,gnb,14,201,Sun,L,3-10,...,102.943,0.546,1.172,377.936,258.194,119.742,1.491,3.989,-13.0,-13.0
