<a href="https://colab.research.google.com/github/alexcontarino/personal-projects/blob/main/Sports_Betting/NFL_Moneyline_Scrape.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Scrapes DraftKings website for moneylines in NFL games for upcoming season. Saves response in data frame and then computes team win probabilities for each game.

## Set-Up

In [1]:
import requests
from bs4 import BeautifulSoup
import re
import numpy as np
import pandas as pd
from google.colab import drive

In [2]:
drive.mount('/content/drive')

Mounted at /content/drive


## Webscrape and parsing

In [6]:
# Step 1: Send an HTTP request to the website
url = 'https://sportsbook.draftkings.com/leagues/football/nfl'
response = requests.get(url)

In [7]:
# Check if the request was successful
data_text = {}
i = 0
if response.status_code == 200:
    # Step 2: Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Extracting all <div> elements with class "content"
    content_divs = soup.find_all('div', class_='parlay-card-10-a')
    for div in content_divs:
      i += 1
      data_text[i] = div.text.strip() # .strip() to remove leading/trailing whitespace
else:
    print(f'Failed to retrieve the webpage. Status code: {response.status_code}')

In [8]:
# Step 2: Parse response to find each game
pattern = r'[A-Z]{4,}'
drop_sequence = "SpreadTotalMoneyline"

master_list = []
for gameday in data_text:
    test_text = data_text[gameday]

    position = test_text.find(drop_sequence)

    if position != -1:
        text_after_sequence = test_text[position + len(drop_sequence):]
    else:
        text_after_sequence = test_text

    parsed_list = re.split(pattern, text_after_sequence)[1:]
    parsed_list = [segment.strip() for segment in parsed_list if segment.strip()]

    master_list += parsed_list

In [14]:
# Step 3: For each game, identify teams, sprad and moneyline
pattern_team = r'[a-zA-Z]{3,12}'
pattern_spread = r'([\+-]\d+(?:\.\d+)?)'
pattern_line = r'([\+−].{3})(?!.*[\+−])'

team_dict = {}
team_line_list = []
for team_game_line in master_list:
    team = re.findall(pattern_team, team_game_line)[0]
    spread = re.findall(pattern_spread, team_game_line)[0]
    moneyline = re.findall(pattern_line, team_game_line)[0]
    try:
        team_dict[team].append((spread,moneyline))
    except:
        team_dict[team] = [(spread,moneyline)]

    if team == "ers":
        team_line_list.append(("49ers",spread,moneyline))
    else:
        team_line_list.append((team,spread,moneyline))

team_dict['49ers'] = team_dict.pop('ers')

## Convert response to dataframe and check for errors

In [16]:
# Step 4: Organize data into table format
team_line_list

game_list = [None] * (16*17)
team_counter = 0
game_counter = -1
for team, spread, line in team_line_list:
    if team_counter == 2:
        # reset game counter if iterated thru two teams
        team_counter = 0
    if team_counter == 0:
        # initiate new game instance and increment game counter
        game_counter += 1
        game_list[game_counter] = {"away": None, "home": None, "home_spread": None, "away_line": None, "home_line": None}
        # input away team data
        game_list[game_counter]["away"] = team
        game_list[game_counter]["away_line"] = line
    if team_counter == 1:
        game_list[game_counter]["home"] = team
        game_list[game_counter]["home_spread"] = spread
        game_list[game_counter]["home_line"] = line
    # increment team counter
    team_counter += 1

In [18]:
game_dict = pd.DataFrame(game_list)
game_dict.head()

Unnamed: 0,away,home,home_spread,away_line,home_line
0,Cowboys,Eagles,-7.0,+260,−325
1,Chiefs,Chargers,3.0,−148,+124
2,Giants,Commanders,-7.0,+235,−290
3,Bengals,Browns,5.5,−230,+190
4,Raiders,Patriots,-3.5,+136,−162


In [19]:
# check for errors in parsing

team_count_check = 32

if len(game_dict.away.unique()) != team_count_check or len(game_dict.home.unique()) != team_count_check:
  print("Error: Incorrect number of teams")
else:
  print("Success: 32 unique teams identified")

print("\n Home Teams:", sorted(list(game_dict.home.unique())))

print("\n Away Teams:", sorted(list(game_dict.away.unique())))

Success: 32 unique teams identified

 Home Teams: ['49ers', 'Bears', 'Bengals', 'Bills', 'Broncos', 'Browns', 'Buccaneers', 'Cardinals', 'Chargers', 'Chiefs', 'Colts', 'Commanders', 'Cowboys', 'Dolphins', 'Eagles', 'Falcons', 'Giants', 'Jaguars', 'Jets', 'Lions', 'Packers', 'Panthers', 'Patriots', 'Raiders', 'Rams', 'Ravens', 'Saints', 'Seahawks', 'Steelers', 'Texans', 'Titans', 'Vikings']

 Away Teams: ['49ers', 'Bears', 'Bengals', 'Bills', 'Broncos', 'Browns', 'Buccaneers', 'Cardinals', 'Chargers', 'Chiefs', 'Colts', 'Commanders', 'Cowboys', 'Dolphins', 'Eagles', 'Falcons', 'Giants', 'Jaguars', 'Jets', 'Lions', 'Packers', 'Panthers', 'Patriots', 'Raiders', 'Rams', 'Ravens', 'Saints', 'Seahawks', 'Steelers', 'Texans', 'Titans', 'Vikings']


## Estimate true win probabilities from game moneylines

In [20]:
def odds_to_prob(moneyline):
    # Input:
    ## moneyline: text

    # Output:
    ## implied_prob

    if moneyline[0] == "+":
        num_moneyline = float(moneyline[1:])
        implied_prob = 100 / (100 + num_moneyline)
    else:
        num_moneyline = -1 * float(moneyline[1:])
        implied_prob = num_moneyline / (-100 + num_moneyline)

    return implied_prob

In [21]:
def prob_vig_adjust(implied_prob_1, implied_prob_2):

    # Input: implied probabilities
    ## away_prob:
    ## home_prob

    # Output: estimated true probabilities, adjusted for the overage/vig charged by the sportsbook
    ## [prob_1, prob_2]

    vig = implied_prob_1 + implied_prob_2

    prob_1 = (implied_prob_1 / vig).reshape(-1,1)
    prob_2 = (implied_prob_2 / vig).reshape(-1,1)

    return np.concatenate((prob_1,prob_2), axis=-1)


In [22]:
def compute_prob(away_line, home_line):

    implied_probs = np.zeros((len(away_line), 2))

    for i, (away, home) in enumerate(zip(away_line, home_line)):
        implied_probs[i,0] = odds_to_prob(away)
        implied_probs[i,1] = odds_to_prob(home)

    probs = prob_vig_adjust(implied_probs[:,0], implied_probs[:,1])

    return probs

In [23]:
# Convert moneylines into true win probabilities
probs = compute_prob(game_dict.away_line, game_dict.home_line)
game_dict["away_prob"] = probs[:,0]
game_dict["home_prob"] = probs[:,1]

In [24]:
game_dict.head()

Unnamed: 0,away,home,home_spread,away_line,home_line,away_prob,home_prob
0,Cowboys,Eagles,-7.0,+260,−325,0.266458,0.733542
1,Chiefs,Chargers,3.0,−148,+124,0.57206,0.42794
2,Giants,Commanders,-7.0,+235,−290,0.286449,0.713551
3,Bengals,Browns,5.5,−230,+190,0.669007,0.330993
4,Raiders,Patriots,-3.5,+136,−162,0.40663,0.59337


## Save results

In [29]:
# Save results
game_dict.to_csv("drive/MyDrive/2025_gamelines.csv", index=False)