<a href="https://colab.research.google.com/github/alexcontarino/personal-projects/blob/main/Sports_Betting/NFL_Moneyline_Scrape.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Scrapes DraftKings website for moneylines, spreads, over-unders, and implied win probabilities in NFL games for upcoming season. Results saved to data frame.

## Set-Up

In [33]:
import requests
from bs4 import BeautifulSoup
import re
import numpy as np
import pandas as pd
from google.colab import drive

In [34]:
drive.mount('/content/drive')

Mounted at /content/drive


## Webscrape and parsing

In [35]:
# Step 1: Send an HTTP request to the website
url = 'https://sportsbook.draftkings.com/leagues/football/nfl'
response = requests.get(url)

In [36]:
# Check if the request was successful
data_text = {}
i = 0
if response.status_code == 200:
    # Step 2: Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extracting all <div> elements with class "content"
    content_divs = soup.find_all('div', class_='parlay-card-10-a')
    for div in content_divs:
      i += 1
      data_text[i] = div.text.strip() # .strip() to remove leading/trailing whitespace
else:
    print(f'Failed to retrieve the webpage. Status code: {response.status_code}')

In [37]:
# Step 2: Parse response to find each game
pattern = r'[A-Z]{4,}'
drop_sequence = "SpreadTotalMoneyline"

master_list = []
for gameday in data_text:
    test_text = data_text[gameday]

    position = test_text.find(drop_sequence)

    if position != -1:
        text_after_sequence = test_text[position + len(drop_sequence):]
    else:
        text_after_sequence = test_text

    parsed_list = re.split(pattern, text_after_sequence)[1:]
    parsed_list = [segment.strip() for segment in parsed_list if segment.strip()]

    master_list += parsed_list

In [38]:
master_list

['Cowboys+7−110O\xa046.5−110+26012:20',
 'Eagles-7−110U\xa046.5−110−325',
 'Chiefs-3−105O\xa044.5−110−14212:00',
 'Chargers+3−115U\xa044.5−110+120',
 'Bengals-5.5−110O\xa044.5−115−2305:00',
 'Browns+5.5−110U\xa044.5−105+1905:00',
 'Giants+7−115O\xa045.5−105+2305:00',
 'Commanders-7−105U\xa045.5−115−2855:00',
 'Steelers-3−102O\xa039.5−110−1555:00',
 'Jets+3−118U\xa039.5−110+1305:00',
 'Cardinals-5.5−110O\xa042.5−110−2305:00',
 'Saints+5.5−110U\xa042.5−110+1905:00',
 'Raiders+3−105O\xa042.5−110+1365:00',
 'Patriots-3−115U\xa042.5−110−1625:00',
 'Panthers+2.5−108O\xa045.5−115+1145:00',
 'Jaguars-2.5−112U\xa045.5−105−1355:00',
 'Buccaneers-2.5−115O\xa048.5−110−1425:00',
 'Falcons+2.5−105U\xa048.5−110+1205:00',
 'Dolphins+1.5−112O\xa045.5−110+1005:00',
 'Colts-1.5−108U\xa045.5−110−1208:05',
 'Titans+7.5−105O\xa041.5−110+2958:05',
 'Broncos-7.5−115U\xa041.5−110−3758:05',
 '49ers-1.5−108O\xa045.5−110−1208:05',
 'Seahawks+1.5−112U\xa045.5−110+1008:25',
 'Texans+3−110O\xa045.5−110+1308:25',
 'R

In [39]:
# Step 3: For each game, identify teams, sprad and moneyline
pattern_team = r'[a-zA-Z]{3,12}'
pattern_spread = r'([\+-]\d+(?:\.\d+)?)'
pattern_line = r'([\+−].{3})(?!.*[\+−])'
pattern_total = r"\xa0(.*?)−"

no_total = 0
team_dict = {}
team_line_list = []
for team_game_line in master_list:
    team = re.findall(pattern_team, team_game_line)[0]
    spread = re.findall(pattern_spread, team_game_line)[0]
    moneyline = re.findall(pattern_line, team_game_line)[0]
    total = re.findall(pattern_total, team_game_line)
    if total:
        total = total[0]
    else:
        no_total += 1
        total = None

    try:
        team_dict[team].append((spread, moneyline, total))
    except:
        team_dict[team] = [(spread, moneyline, total)]

    if team == "ers":
        team_line_list.append(("49ers",spread, moneyline, total))
    else:
        team_line_list.append((team,spread,moneyline,total))

team_dict['49ers'] = team_dict.pop('ers')

print(f"No Total Offered: {no_total}")

No Total Offered: 28


## Convert response to dataframe and check for errors

In [40]:
# Step 4: Organize data into table format
team_line_list

game_list = [None] * (16*17)
team_counter = 0
game_counter = -1
for team, spread, line, total in team_line_list:
    if team_counter == 2:
        # reset game counter if iterated thru two teams
        team_counter = 0
    if team_counter == 0:
        # initiate new game instance and increment game counter
        game_counter += 1
        game_list[game_counter] = {"away": None, "home": None, "home_spread": None, "away_line": None, "home_line": None, "total": None}
        # input away team data
        game_list[game_counter]["away"] = team
        game_list[game_counter]["away_line"] = line
        game_list[game_counter]["total"] = total
    if team_counter == 1:
        game_list[game_counter]["home"] = team
        game_list[game_counter]["home_spread"] = spread
        game_list[game_counter]["home_line"] = line
    # increment team counter
    team_counter += 1

In [41]:
game_dict = pd.DataFrame(game_list)
game_dict["away_points"] = (game_dict.total.astype("float64") - game_dict.home_spread.astype("float64")) / 2 + game_dict.home_spread.astype("float64")
game_dict["home_points"] = (game_dict.total.astype("float64") + game_dict.home_spread.astype("float64")) / 2 - game_dict.home_spread.astype("float64")
game_dict.head()

Unnamed: 0,away,home,home_spread,away_line,home_line,total,away_points,home_points
0,Cowboys,Eagles,-7.0,+260,−325,46.5,19.75,26.75
1,Chiefs,Chargers,3.0,−142,+120,44.5,23.75,20.75
2,Bengals,Browns,5.5,−230,+190,44.5,25.0,19.5
3,Giants,Commanders,-7.0,+230,−285,45.5,19.25,26.25
4,Steelers,Jets,3.0,−155,+130,39.5,21.25,18.25


In [42]:
# check for errors in parsing

team_count_check = 32

if len(game_dict.away.unique()) != team_count_check or len(game_dict.home.unique()) != team_count_check:
  print("Error: Incorrect number of teams")
else:
  print("Success: 32 unique teams identified")

print("\n Home Teams:", sorted(list(game_dict.home.unique())))

print("\n Away Teams:", sorted(list(game_dict.away.unique())))

Success: 32 unique teams identified

 Home Teams: ['49ers', 'Bears', 'Bengals', 'Bills', 'Broncos', 'Browns', 'Buccaneers', 'Cardinals', 'Chargers', 'Chiefs', 'Colts', 'Commanders', 'Cowboys', 'Dolphins', 'Eagles', 'Falcons', 'Giants', 'Jaguars', 'Jets', 'Lions', 'Packers', 'Panthers', 'Patriots', 'Raiders', 'Rams', 'Ravens', 'Saints', 'Seahawks', 'Steelers', 'Texans', 'Titans', 'Vikings']

 Away Teams: ['49ers', 'Bears', 'Bengals', 'Bills', 'Broncos', 'Browns', 'Buccaneers', 'Cardinals', 'Chargers', 'Chiefs', 'Colts', 'Commanders', 'Cowboys', 'Dolphins', 'Eagles', 'Falcons', 'Giants', 'Jaguars', 'Jets', 'Lions', 'Packers', 'Panthers', 'Patriots', 'Raiders', 'Rams', 'Ravens', 'Saints', 'Seahawks', 'Steelers', 'Texans', 'Titans', 'Vikings']


## Estimate true win probabilities from game moneylines

In [43]:
def odds_to_prob(moneyline):
    # Input:
    ## moneyline: text

    # Output:
    ## implied_prob

    if moneyline[0] == "+":
        num_moneyline = float(moneyline[1:])
        implied_prob = 100 / (100 + num_moneyline)
    else:
        num_moneyline = -1 * float(moneyline[1:])
        implied_prob = num_moneyline / (-100 + num_moneyline)

    return implied_prob

In [44]:
def prob_vig_adjust(implied_prob_1, implied_prob_2):

    # Input: implied probabilities
    ## away_prob:
    ## home_prob

    # Output: estimated true probabilities, adjusted for the overage/vig charged by the sportsbook
    ## [prob_1, prob_2]

    vig = implied_prob_1 + implied_prob_2

    prob_1 = (implied_prob_1 / vig).reshape(-1,1)
    prob_2 = (implied_prob_2 / vig).reshape(-1,1)

    return np.concatenate((prob_1,prob_2), axis=-1)


In [45]:
def compute_prob(away_line, home_line):

    implied_probs = np.zeros((len(away_line), 2))

    for i, (away, home) in enumerate(zip(away_line, home_line)):
        implied_probs[i,0] = odds_to_prob(away)
        implied_probs[i,1] = odds_to_prob(home)

    probs = prob_vig_adjust(implied_probs[:,0], implied_probs[:,1])

    return probs

In [46]:
# Convert moneylines into true win probabilities
probs = compute_prob(game_dict.away_line, game_dict.home_line)
game_dict["away_prob"] = probs[:,0]
game_dict["home_prob"] = probs[:,1]

In [47]:
game_dict.head()

Unnamed: 0,away,home,home_spread,away_line,home_line,total,away_points,home_points,away_prob,home_prob
0,Cowboys,Eagles,-7.0,+260,−325,46.5,19.75,26.75,0.266458,0.733542
1,Chiefs,Chargers,3.0,−142,+120,44.5,23.75,20.75,0.563492,0.436508
2,Bengals,Browns,5.5,−230,+190,44.5,25.0,19.5,0.669007,0.330993
3,Giants,Commanders,-7.0,+230,−285,45.5,19.25,26.25,0.290456,0.709544
4,Steelers,Jets,3.0,−155,+130,39.5,21.25,18.25,0.582993,0.417007


## Save results

In [48]:
# Save results
game_dict.to_csv("drive/MyDrive/2025_gamelines.csv", index=False)