## Initalize notebook

In [1]:
import requests
from bs4 import BeautifulSoup
import datetime
import pandas as pd

## Define custom functions

In [2]:
def is_float(s: str) -> bool:
    try:
        float(s)
        return True
    except ValueError:
        return False

## Define constants

In [3]:
TEAM_MAPPING = {
    "arizona": "ari",
    "atlanta": "atl",
    "baltimore": "bal",
    "buffalo": "buf",
    "carolina": "car",
    "chicago": "chi",
    "cincinnati": "cin",
    "cleveland": "cle",
    "dallas": "dal",
    "denver": "den",
    "detroit": "det",
    "green bay": "gb",
    "houston": "hou",
    "indianapolis": "ind",
    "jacksonville": "jac",
    "kansas city": "kan",
    "la chargers": "lac",
    "la rams": "lar",
    "las vegas": "lv",
    "miami": "mia",
    "minnesota": "min",
    "new england": "ne",
    "new orleans": "no",
    "ny giants": "nyg",
    "ny jets": "nyj",
    "pittsburgh": "pit",
    "philadelphia": "phi",
    "san francisco": "sfo",
    "seattle": "sea",
    "tampa bay": "tam",
    "tennessee": "ten",
    "washington": "was",
}

## Get data

In [4]:
r = requests.get("http://www.footballlocks.com/nfl_lines.shtml")
r.status_code

200

## Filter and parse data

In [5]:
soup = BeautifulSoup(r.text)
spans = [s for s in soup.body.findAll("span") if "NFL Lines For Week 7" in s.text]
line_span = spans[-1]
trs = line_span.find_all_next("tr", limit=20)

keep = []
for tr in trs:
    tds = tr.find_all("td")
    if len(tds) == 5 and is_float(tds[4].text):
        keep.append(tr)

print("number of games found:", len(keep))

number of games found: 13


In [6]:
headers = ["datetime", "favorite", "line", "underdog", "over_under", "home"]
parsed = []
for tr in keep:
    # get tds
    tds = tr.find_all("td")
    # parse game datetime
    game_dt = tds[0].text[:10]
    game_dt = datetime.datetime.strptime(f"20/{game_dt}", "%y/%m/%d %H:%M")
    # parse favorite, underdog, and home team
    favorite = tds[1].text.lower()
    underdog = tds[3].text.lower()
    home_team = favorite.replace("at ", "")
    if "at " in underdog:
        home_team = underdog.replace("at ", "")
    favorite = favorite.replace("at ", "")
    underdog = underdog.replace("at ", "")
    # parse line and O/U
    line = tds[2].text
    if not is_float(line):
        line = 0.0
    line = float(line)
    over_under = float(tds[4].text)
    # merge data and headers
    data = [game_dt, favorite, line, underdog, over_under, home_team]
    merged = {k: v for k, v in zip(headers, data)}
    parsed.append(merged)
    

df_data = pd.DataFrame(parsed)
df_data["favorite"] = df_data["favorite"].map(TEAM_MAPPING)
df_data["underdog"] = df_data["underdog"].map(TEAM_MAPPING)
df_data["home"] = df_data["home"].map(TEAM_MAPPING)
df_data.head()

Unnamed: 0,datetime,favorite,line,underdog,over_under,home
0,2020-10-25 01:00:00,cle,-3.5,cin,50.5,cin
1,2020-10-25 01:00:00,was,0.0,dal,46.0,was
2,2020-10-25 01:00:00,atl,-2.5,det,55.0,atl
3,2020-10-25 01:00:00,no,-7.5,car,51.5,no
4,2020-10-25 01:00:00,buf,-12.0,nyj,45.0,nyj


## Save data

In [7]:
df_data.to_csv("schedule_and_odds_week7.csv", index=False, date_format="%y-%m-%dT%H:%M:%S")