In [22]:
# Libraries
from bs4 import BeautifulSoup, Comment
import numpy as np
import pandas as pd
import urllib.request
import requests
import random
import datetime
import math

## Game Objects

In [9]:
class GameInfo:
    def __init__(self, home_team, away_team, home_spread, away_spread, over_under=None):
        self.home_team = home_team
        self.away_team = away_team
        self.home_spread = home_spread
        self.away_spread = away_spread
        self.over_under = over_under
    def __str__(self):
        return(self.away_team + ' (' + str(self.away_spread) + ') @ '
               +self.home_team + '\n' +
              'O/U: ' + str(self.over_under))

In [10]:
class GameResults:
    def __init__(self, home_team, away_team, home_score, away_score):
        self.home_team = home_team
        self.away_team = away_team
        self.home_score = home_score
        self.away_score = away_score
        self.total = home_score + away_score
        self.winner = home_team if home_score > away_score else away_team if away_score > home_score else 'tie'
    def __str__(self):
        return(self.home_team +' (home): '+ str(self.home_score) + '\n' +
              self.away_team +' (away): '+ str(self.away_score) + '\n' +
              'Total: '+ str(self.total))

## Game Info scrape

In [6]:
def get_game_info(score_box):
    odds = score_box.find_all('td', {'class': 'in-progress-odds'})
    over_under = float(odds[0].text.strip()[1:])
    home_line = float(odds[1].text.strip())
    teams = score_box.find_all('td', {'class': 'team'})
    away_team = teams[0].find('a')['href'].split('/')[5]
    home_team = teams[1].find('a')['href'].split('/')[5]
    game_info_obj = GameInfo(home_team, away_team, home_line, home_line * -1, over_under)
    return(game_info_obj)

In [11]:
def get_game_info_list(week_num):
    link = 'https://www.cbssports.com/nfl/scoreboard/all/2021/regular/' + str(week_num) + '/'
    with urllib.request.urlopen(link) as url:
        page = url.read()
    soup = BeautifulSoup(page, "html.parser")
    # list of game info boxes for all games
    score_boxes = soup.find_all('div', {'class':'live-update'})
    game_info_list = []
    for score_box in score_boxes:
        #print(score_box)
        game_info_list.append(get_game_info(score_box))
    return(game_info_list)

In [12]:
week_num = 3
game_info_list = get_game_info_list(week_num)
print(game_info_list[0])

CAR (-4.0) @ HOU
O/U: 45.0


## Game Result scrape

In [13]:
def get_game_result(t):
    teams = t.find_all('a', {'class': 'team'})
    away_team = teams[0]['href'].split('/')[5]
    home_team = teams[1]['href'].split('/')[5]
    scores = t.find_all('td', {'class': 'total-score'})
    game_result_obj = GameResults(home_team, away_team, int(scores[1].text), int(scores[0].text))
    return(game_result_obj)

In [14]:
def get_game_result_list(week_num):
    link = 'https://www.cbssports.com/nfl/scoreboard/all/2021/regular/' + str(week_num) + '/'
    with urllib.request.urlopen(link) as url:
        page = url.read()
    soup = BeautifulSoup(page, "html.parser")
    t_list = soup.find_all('div', {'class': 'live-update'})
    game_result_list = []
    for t in t_list:
        game_result_list.append(get_game_result(t))
    return(game_result_list)

In [15]:
# run scraper
week_num = 1
game_result_list = get_game_result_list(week_num)
print(game_result_list[0])

LV (home): 33
BAL (away): 27
Total: 60


## Generate Datasets

In [16]:
teams = []
for game in game_info_list:
    teams.append(game.home_team)
    teams.append(game.away_team)
print(teams)

['HOU', 'CAR', 'JAC', 'ARI', 'NYG', 'ATL', 'DET', 'BAL', 'CLE', 'CHI', 'PIT', 'CIN', 'TEN', 'IND', 'KC', 'LAC', 'NE', 'NO', 'BUF', 'WAS', 'LV', 'MIA', 'DEN', 'NYJ', 'MIN', 'SEA', 'LAR', 'TB', 'SF', 'GB', 'DAL', 'PHI']


In [17]:
def get_fake_data():
    # initiate possible values
    teams = ['HOU', 'CAR', 'JAC', 'ARI', 'NYG', 'ATL', 'DET', 'BAL', 'CLE', 'CHI', 'PIT', 'CIN', 'TEN', 'IND', 'KC', 'LAC', 'NE', 'NO', 'BUF', 'WAS', 'LV', 'MIA', 'DEN', 'NYJ', 'MIN', 'SEA', 'LAR', 'TB', 'SF', 'GB', 'DAL', 'PHI']
    spreads = list(np.arange(-10, 10, 0.5))
    over_unders = list(np.arange(33, 53, 0.5))
    scores = range(14, 42)
    # generate a full week of fake games
    fake_games_list = []
    fake_results_list = []
    for i in range(16):
        spread = random.choice(spreads)
        home_team = random.choice(teams)
        teams.remove(home_team)
        away_team = random.choice(teams)
        teams.remove(away_team)
        ou = random.choice(over_unders)
        fake_games_list.append(GameInfo(home_team, away_team, spread, -1 * spread, ou))
        fake_results_list.append(GameResults(home_team, away_team,
                                            random.choice(scores), random.choice(scores)))
    return(fake_games_list, fake_results_list)

In [18]:
fake_games, fake_results = get_fake_data()
for i in range(len(fake_games)):
    print(fake_games[i])
    print(fake_results[i])

LV (-2.0) @ MIA
O/U: 52.5
MIA (home): 16
LV (away): 22
Total: 38
TB (4.0) @ LAR
O/U: 50.5
LAR (home): 37
TB (away): 15
Total: 52
IND (-4.0) @ NO
O/U: 33.0
NO (home): 16
IND (away): 33
Total: 49
TEN (-1.5) @ HOU
O/U: 39.0
HOU (home): 34
TEN (away): 40
Total: 74
GB (4.0) @ BUF
O/U: 39.5
BUF (home): 18
GB (away): 22
Total: 40
DAL (-8.5) @ PHI
O/U: 37.0
PHI (home): 31
DAL (away): 17
Total: 48
CIN (-9.0) @ ARI
O/U: 40.0
ARI (home): 18
CIN (away): 16
Total: 34
DET (4.0) @ BAL
O/U: 42.5
BAL (home): 35
DET (away): 30
Total: 65
KC (-1.0) @ CLE
O/U: 51.0
CLE (home): 27
KC (away): 21
Total: 48
PIT (-9.0) @ LAC
O/U: 33.0
LAC (home): 22
PIT (away): 24
Total: 46
SF (1.0) @ WAS
O/U: 37.0
WAS (home): 24
SF (away): 24
Total: 48
SEA (-7.5) @ CAR
O/U: 51.0
CAR (home): 26
SEA (away): 35
Total: 61
MIN (-3.5) @ JAC
O/U: 48.0
JAC (home): 22
MIN (away): 35
Total: 57
NYG (-5.5) @ NYJ
O/U: 40.0
NYJ (home): 28
NYG (away): 20
Total: 48
ATL (-2.0) @ NE
O/U: 35.5
NE (home): 29
ATL (away): 33
Total: 62
CHI (-2.0) @ 

## Helper functions

In [55]:
# function for returning the week of the NFL season
# :param dt: datetime object (can change input type if necessary)
# :return: tuple of (week number, year)
def get_week_season(dt):
    day_of_year = dt.timetuple().tm_yday
    # 252 was the day of year the season started
    # handling final weeks of season in 2022
    if day_of_year < 251:
        day_of_year += 365
    week_num = math.ceil((day_of_year - 251) / 7)
    return((dt.year, week_num))

In [56]:
d = datetime.date(2021, 12, 23)
get_week_season(d)

(2021, 16)