<a href="https://colab.research.google.com/github/alexcontarino/personal-projects/blob/main/Sports_Betting/NFL_Moneyline_Scrape.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Scrapes DraftKings website for moneylines, spreads, over-unders, and implied win probabilities in NFL games for upcoming season. Results saved to data frame.

## Set-Up

In [1]:
import requests
from bs4 import BeautifulSoup
import re
import numpy as np
import pandas as pd
from google.colab import drive

In [2]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Webscrape and parsing

In [3]:
# Step 1: Send an HTTP request to the website
url = 'https://sportsbook.draftkings.com/leagues/football/nfl'
response = requests.get(url)

In [4]:
# Check if the request was successful
data_text = {}
i = 0
if response.status_code == 200:
    # Step 2: Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extracting all <div> elements with class "content"
    content_divs = soup.find_all('div', class_='parlay-card-10-a')
    for div in content_divs:
      i += 1
      data_text[i] = div.text.strip() # .strip() to remove leading/trailing whitespace
else:
    print(f'Failed to retrieve the webpage. Status code: {response.status_code}')

In [5]:
# Step 2: Parse response to find each game
pattern = r'[A-Z]{4,}'
drop_sequence = "SpreadTotalMoneyline"

master_list = []
for gameday in data_text:
    test_text = data_text[gameday]

    position = test_text.find(drop_sequence)

    if position != -1:
        text_after_sequence = test_text[position + len(drop_sequence):]
    else:
        text_after_sequence = test_text

    parsed_list = re.split(pattern, text_after_sequence)[1:]
    parsed_list = [segment.strip() for segment in parsed_list if segment.strip()]

    master_list += parsed_list

In [6]:
master_list[0]

'Cowboys+7−110O\xa046.5−110+25012:20'

In [7]:
# Step 3: For each game, identify teams, sprad and moneyline
pattern_team = r'[a-zA-Z]{3,12}'
pattern_spread_lines = r'[+\-−]\d{1,3}(?:d+\.\d+)?'
pattern_total = r"\xa0(.*?)−"

no_total = 0
team_dict = {}
team_line_list = []
for team_game_line in master_list:
    team = re.findall(pattern_team, team_game_line)[0]
    spread_lines = re.findall(pattern_spread_lines, team_game_line)
    if len(spread_lines) == 4:
      spread = spread_lines[0]
      spread_line = spread_lines[1]
      total_line = spread_lines[2]
      moneyline = spread_lines[3]
      total = re.findall(pattern_total, team_game_line)[0]
    else:
      spread = spread_lines[0]
      spread_line = spread_lines[1]
      moneyline = spread_lines[2]
      total_line = None
      total = None
      no_total += 1

    try:
        team_dict[team].append((spread, spread_line, total, total_line, moneyline))
    except:
        team_dict[team] = [(spread, spread_line, total, total_line, moneyline)]

    if team == "ers":
        team_line_list.append(("49ers",spread, spread_line, total, total_line, moneyline))
    else:
        team_line_list.append((team,spread, spread_line, total, total_line, moneyline))

team_dict['49ers'] = team_dict.pop('ers')

print(f"No Total Offered: {no_total}")

No Total Offered: 26


## Convert response to dataframe and check for errors

In [8]:
# Step 4: Organize data into table format

game_list = [None] * int(len(team_line_list)/2)
team_counter = 0
game_counter = -1
for team, spread, spread_line, total, total_line, moneyline in team_line_list:
    if team_counter == 2:
        # reset game counter if iterated thru two teams
        team_counter = 0
    if team_counter == 0:
        # initiate new game instance and increment game counter
        game_counter += 1
        game_list[game_counter] = {"away": None, "home": None,
                                   "home_spread": None, "away_spread_line": None, "home_spread_line": None,
                                   "total": None, "over_line": None, "under_line": None,
                                   "away_moneyline": None, "home_moneyline": None}
        # input away team data
        game_list[game_counter]["away"] = team
        game_list[game_counter]["away_spread_line"] = spread_line
        game_list[game_counter]["away_moneyline"] = moneyline
        game_list[game_counter]["total"] = total
        game_list[game_counter]["over_line"] = total_line
    if team_counter == 1:
        # input home team data
        game_list[game_counter]["home"] = team
        game_list[game_counter]["home_spread"] = spread
        game_list[game_counter]["home_spread_line"] = spread_line
        game_list[game_counter]["home_moneyline"] = moneyline
        game_list[game_counter]["under_line"] = total_line
    # increment team counter
    team_counter += 1

In [9]:
game_dict = pd.DataFrame(game_list)
game_dict["away_points"] = (game_dict.total.astype("float64") - game_dict.home_spread.astype("float64")) / 2 + game_dict.home_spread.astype("float64")
game_dict["home_points"] = (game_dict.total.astype("float64") + game_dict.home_spread.astype("float64")) / 2 - game_dict.home_spread.astype("float64")
game_dict.head()

Unnamed: 0,away,home,home_spread,away_spread_line,home_spread_line,total,over_line,under_line,away_moneyline,home_moneyline,away_points,home_points
0,Cowboys,Eagles,-7,−110,−110,46.5,−110,−110,+250,−310,19.75,26.75
1,Chiefs,Chargers,3,−105,−115,45.5,−110,−110,−148,+124,24.25,21.25
2,Giants,Commanders,-7,−115,−105,45.5,−105,−115,+220,−270,19.25,26.25
3,Bengals,Browns,5,−110,−110,44.5,−115,−105,−230,+190,24.75,19.75
4,Steelers,Jets,3,−102,−118,39.5,−110,−110,−155,+130,21.25,18.25


In [10]:
# check for errors in parsing

team_count_check = 32

if len(game_dict.away.unique()) != team_count_check or len(game_dict.home.unique()) != team_count_check:
  print("Error: Incorrect number of teams")
else:
  print("Success: 32 unique teams identified")

print("\n Home Teams:", sorted(list(game_dict.home.unique())))

print("\n Away Teams:", sorted(list(game_dict.away.unique())))

Success: 32 unique teams identified

 Home Teams: ['49ers', 'Bears', 'Bengals', 'Bills', 'Broncos', 'Browns', 'Buccaneers', 'Cardinals', 'Chargers', 'Chiefs', 'Colts', 'Commanders', 'Cowboys', 'Dolphins', 'Eagles', 'Falcons', 'Giants', 'Jaguars', 'Jets', 'Lions', 'Packers', 'Panthers', 'Patriots', 'Raiders', 'Rams', 'Ravens', 'Saints', 'Seahawks', 'Steelers', 'Texans', 'Titans', 'Vikings']

 Away Teams: ['49ers', 'Bears', 'Bengals', 'Bills', 'Broncos', 'Browns', 'Buccaneers', 'Cardinals', 'Chargers', 'Chiefs', 'Colts', 'Commanders', 'Cowboys', 'Dolphins', 'Eagles', 'Falcons', 'Giants', 'Jaguars', 'Jets', 'Lions', 'Packers', 'Panthers', 'Patriots', 'Raiders', 'Rams', 'Ravens', 'Saints', 'Seahawks', 'Steelers', 'Texans', 'Titans', 'Vikings']


## Estimate true win probabilities from game moneylines

In [11]:
def odds_to_prob(moneyline):
    # Input:
    ## moneyline: text

    # Output:
    ## implied_prob
    if moneyline is None:
      return None

    if moneyline[0] == "+":
        num_moneyline = float(moneyline[1:])
        implied_prob = 100 / (100 + num_moneyline)
    else:
        num_moneyline = -1 * float(moneyline[1:])
        implied_prob = num_moneyline / (-100 + num_moneyline)

    return implied_prob

In [12]:
def prob_vig_adjust(implied_prob_1, implied_prob_2):

    # Input: implied probabilities
    ## away_prob:
    ## home_prob

    # Output: estimated true probabilities, adjusted for the overage/vig charged by the sportsbook
    ## [prob_1, prob_2]

    vig = implied_prob_1 + implied_prob_2

    prob_1 = (implied_prob_1 / vig).reshape(-1,1)
    prob_2 = (implied_prob_2 / vig).reshape(-1,1)

    return np.concatenate((prob_1,prob_2), axis=-1)


In [13]:
def compute_prob(away_line, home_line):

    implied_probs = np.zeros((len(away_line), 2))

    for i, (away, home) in enumerate(zip(away_line, home_line)):
        implied_probs[i,0] = odds_to_prob(away)
        implied_probs[i,1] = odds_to_prob(home)

    probs = prob_vig_adjust(implied_probs[:,0], implied_probs[:,1])

    return probs

In [14]:
# Convert moneylines into true win probabilities
W_probs = compute_prob(game_dict.away_moneyline, game_dict.home_moneyline)
game_dict["away_W_prob"] = W_probs[:,0]
game_dict["home_W_prob"] = W_probs[:,1]
cover_probs = compute_prob(game_dict.away_spread_line, game_dict.home_spread_line)
game_dict["away_cover_prob"] = cover_probs[:,0]
game_dict["home_cover_prob"] = cover_probs[:,1]
over_under_probs = compute_prob(game_dict.over_line, game_dict.under_line)
game_dict["over_prob"] = over_under_probs[:,0]
game_dict["under_prob"] = over_under_probs[:,1]

In [15]:
game_dict.head(16)

Unnamed: 0,away,home,home_spread,away_spread_line,home_spread_line,total,over_line,under_line,away_moneyline,home_moneyline,away_points,home_points,away_W_prob,home_W_prob,away_cover_prob,home_cover_prob,over_prob,under_prob
0,Cowboys,Eagles,-7,−110,−110,46.5,−110,−110,+250,−310,19.75,26.75,0.274247,0.725753,0.5,0.5,0.5,0.5
1,Chiefs,Chargers,3,−105,−115,45.5,−110,−110,−148,+124,24.25,21.25,0.57206,0.42794,0.489166,0.510834,0.5,0.5
2,Giants,Commanders,-7,−115,−105,45.5,−105,−115,+220,−270,19.25,26.25,0.299838,0.700162,0.510834,0.489166,0.489166,0.510834
3,Bengals,Browns,5,−110,−110,44.5,−115,−105,−230,+190,24.75,19.75,0.669007,0.330993,0.5,0.5,0.510834,0.489166
4,Steelers,Jets,3,−102,−118,39.5,−110,−110,−155,+130,21.25,18.25,0.582993,0.417007,0.482636,0.517364,0.5,0.5
5,Buccaneers,Falcons,2,−115,−105,48.5,−110,−110,−135,+114,25.25,23.25,0.551441,0.448559,0.510834,0.489166,0.5,0.5
6,Raiders,Patriots,-3,−105,−115,42.5,−110,−110,+136,−162,19.75,22.75,0.40663,0.59337,0.489166,0.510834,0.5,0.5
7,Cardinals,Saints,5,−110,−110,42.5,−110,−110,−230,+190,23.75,18.75,0.669007,0.330993,0.5,0.5,0.5,0.5
8,Dolphins,Colts,-1,−112,−108,45.5,−110,−110,−102,−118,22.25,23.25,0.482636,0.517364,0.50433,0.49567,0.5,0.5
9,Panthers,Jaguars,-3,−112,−108,45.5,−115,−105,+130,−155,21.25,24.25,0.417007,0.582993,0.50433,0.49567,0.510834,0.489166


## Save results

In [16]:
# Save results
game_dict.to_csv("drive/MyDrive/2025_gamelines.csv", index=False)