# NFL Betting Odds

**Author:** Jakob Malasig
**Last modified:** 11-17-2024 (created: 11-17-2024)

**Description:** I will be downloading data for NFL season (2018 to current) from Fixture Downloads and integrating into one. Will then export it as a csv file for further anaylsis.

<br>

Import libraries:

In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import time
import random

<br>

Function to help with web scraping:

In [3]:
# function to scroll from the top to the bottom of the web page
def random_scroll(browser, total_wait_time = 5):
    # get the total height of the page
    total_height = browser.execute_script("return document.body.scrollHeight")
    
    # number of steps to scroll (you can adjust this number)
    scroll_steps = random.randint(3, 10) # randomize how many scroll steps we will use
    
    # calculate the height to scroll on each step
    scroll_increment = total_height // scroll_steps

    # calculate the total time available for scrolling each step
    time_per_step = total_wait_time / scroll_steps
    
    # random scrolling across time
    for step in range(scroll_steps):
        # scroll by the increment (dividing total height by number of steps)
        browser.execute_script(f"window.scrollBy(0, {scroll_increment});")
        
        # random wait time between scrolls to simulate varying speed
        time.sleep(random.uniform(0.5 * time_per_step, 1.5 * time_per_step))  # randomize the wait within a range
        
    # final scroll to make sure you are at the very bottom (in case it didn't exactly match)
    browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")

<br>

Setting up for scraping:

In [5]:
# setup chrome
chrome_options = Options()
services = Service(ChromeDriverManager().install())

# initialize webdriver
browser = webdriver.Chrome(service = Service())

<br>

Scraping data:

In [7]:
# create an empty lists to store the extracted data
dates = []
times = []
favorites = []
underdogs = []
scores = []
spreads = []
over_unders = []

# loop through years 2018-2024 (current)
# URL pattern: https://www.sportsoddshistory.com/nfl-game-season/?y=2018

# loop through 2019-2024 after testing
# loop through years 2018-2024 (current)
for i in range(18, 19):  # Make sure this range matches your desired years
    url = f"https://www.sportsoddshistory.com/nfl-game-season/?y=20{i}"
    print(url)
    
    # navigate to the web page using the URL
    browser.get(url)
    browser.maximize_window()

    # add a random delay before scraping
    total_wait_time = random.uniform(2, 20)
    random_scroll(browser, total_wait_time)

    # Find all the game rows
    games = browser.find_elements(By.XPATH, '//table[@class="soh1"]/tbody/tr')

    # loop through the games and scrape data
    for game in games[2:]:  # Skip the first two rows (header rows)
        try:
            # date
            date_element = game.find_element(By.XPATH, '//table[@class="soh1"]/tbody/tr//td[2]')
            date = date_element.text.strip()
            dates.append(date)

            # time
            time_element = game.find_element(By.XPATH, '//table[@class="soh1"]/tbody/tr//td[3]')
            time = time_element.text.strip()
            times.append(time)

            # favorite
            favorite_element = game.find_element(By.XPATH, '//table[@class="soh1"]/tbody/tr//td[5]//a')
            favorite = favorite_element.text.strip()
            favorites.append(favorite)

            # underdog
            underdog_element = game.find_element(By.XPATH, '//table[@class="soh1"]/tbody/tr//td[9]//a')
            underdog = underdog_element.text.strip()
            underdogs.append(underdog)

            # score
            score_element = game.find_element(By.XPATH, '//table[@class="soh1"]/tbody/tr//td[6]')
            score = score_element.text.strip()
            scores.append(score)
        
            # spread
            spread_element = game.find_element(By.XPATH, '//table[@class="soh1"]/tbody/tr//td[7]')
            spread = spread_element.text.strip()
            spreads.append(spread)

            # over/under
            over_under_element = game.find_element(By.XPATH, '//table[@class="soh1"]/tbody/tr//td[10]')
            over_under = over_under_element.text.strip()
            over_unders.append(over_under)
            
        except Exception as e:
            print(f"Error scraping game data: {e}")

    # counter to know bot moves to next page
    print(f"20{i} season done")

# close the automated browser
browser.close()

https://www.sportsoddshistory.com/nfl-game-season/?y=2018
2018 season done


In [9]:
print(len(dates))
print(len(times))
print(len(favorites))
print(len(underdogs))
print(len(scores))
print(len(spreads))
print(len(over_unders))

337
337
337
337
337
337
337


In [11]:
# create a dataframe from data
nfl_odds = pd.DataFrame({
    "date": dates,
    "time": times,
    "favorite": favorites,
    "underdog": underdogs,
    "score": scores,
    "spread": spreads,
    "over_under": over_unders 
})

# display the data
display(nfl_odds)

Unnamed: 0,date,time,favorite,underdog,score,spread,over_under
0,10-5 (66.7%),8-7-1 (53.3%),Philadelphia Eagles,Atlanta Falcons,7-4 (63.6%),5-5-1 (50.0%),9-7-0 (56.3%)
1,10-5 (66.7%),8-7-1 (53.3%),Philadelphia Eagles,Atlanta Falcons,7-4 (63.6%),5-5-1 (50.0%),9-7-0 (56.3%)
2,10-5 (66.7%),8-7-1 (53.3%),Philadelphia Eagles,Atlanta Falcons,7-4 (63.6%),5-5-1 (50.0%),9-7-0 (56.3%)
3,10-5 (66.7%),8-7-1 (53.3%),Philadelphia Eagles,Atlanta Falcons,7-4 (63.6%),5-5-1 (50.0%),9-7-0 (56.3%)
4,10-5 (66.7%),8-7-1 (53.3%),Philadelphia Eagles,Atlanta Falcons,7-4 (63.6%),5-5-1 (50.0%),9-7-0 (56.3%)
...,...,...,...,...,...,...,...
332,10-5 (66.7%),8-7-1 (53.3%),Philadelphia Eagles,Atlanta Falcons,7-4 (63.6%),5-5-1 (50.0%),9-7-0 (56.3%)
333,10-5 (66.7%),8-7-1 (53.3%),Philadelphia Eagles,Atlanta Falcons,7-4 (63.6%),5-5-1 (50.0%),9-7-0 (56.3%)
334,10-5 (66.7%),8-7-1 (53.3%),Philadelphia Eagles,Atlanta Falcons,7-4 (63.6%),5-5-1 (50.0%),9-7-0 (56.3%)
335,10-5 (66.7%),8-7-1 (53.3%),Philadelphia Eagles,Atlanta Falcons,7-4 (63.6%),5-5-1 (50.0%),9-7-0 (56.3%)


In [17]:
nfl_odds["date"].unique()

array(['10-5 (66.7%)'], dtype=object)