In [1]:
!pip install requests

In [1]:
import numpy as np
import pandas as pd
import requests
import re
from bs4 import BeautifulSoup

In [None]:
# goals
# - line history
# - % bets
# - % money
# NFL side and total

In [2]:
# map variations to standard team name
def map_team_name(team):
    team_abbreviation_mapping = {
        'Arizona Cardinals': 'ARI',
        'ArizonaCardinals': 'ARI',
        'Atlanta Falcons': 'ATL',
        'AtlantaFalcons': 'ATL',
        'Baltimore Ravens': 'BAL',
        'BaltimoreRavens': 'BAL',
        'Buffalo Bills': 'BUF',
        'BuffaloBills': 'BUF',
        'Carolina Panthers': 'CAR',
        'CarolinaPanthers': 'CAR',
        'Chicago Bears': 'CHI',
        'ChicagoBears': 'CHI',
        'Cincinnati Bengals': 'CIN',
        'CincinnatiBengals': 'CIN',
        'Cleveland Browns': 'CLE',
        'ClevelandBrowns': 'CLE',
        'Dallas Cowboys': 'DAL',
        'DallasCowboys': 'DAL',
        'Denver Broncos': 'DEN',
        'DenverBroncos': 'DEN',
        'Detroit Lions': 'DET',
        'DetroitLions': 'DET',
        'Green Bay Packers': 'GBP',
        'Green BayPackers': 'GBP',
        'Houston Texans': 'HOU',
        'HoustonTexans': 'HOU',
        'Indianapolis Colts': 'IND',
        'IndianapolisColts': 'IND',
        'Jacksonville Jaguars': 'JAC',
        'JacksonvilleJaguars': 'JAC',
        'Kansas City Chiefs': 'KCC',
        'Kansas CityChiefs': 'KCC',
        'Los Angeles Chargers': 'LAC',
        'L.A. ChargersChargers': 'LAC',
        'Los Angeles Rams': 'LAR',
        'L.A. RamsRams': 'LAR',
        'Las Vegas Raiders': 'LVR',
        'Las VegasRaiders': 'LVR',
        'Miami Dolphins': 'MIA',
        'MiamiDolphins': 'MIA',
        'Minnesota Vikings': 'MIN',
        'MinnesotaVikings': 'MIN',
        'New York Giants': 'NYG',
        'N.Y. GiantsGiants': 'NYG',
        'New York Jets': 'NYJ',
        'N.Y. JetsJets': 'NYJ',
        'New England Patriots': 'NEP',
        'New EnglandPatriots': 'NEP',
        'New Orleans Saints': 'NOS',
        'New OrleansSaints': 'NOS',
        'Philadelphia Eagles': 'PHI',
        'PhiladelphiaEagles': 'PHI',
        'Pittsburgh Steelers': 'PIT',
        'PittsburghSteelers': 'PIT',
        'San Francisco 49ers': 'SFO',
        'San Francisco49ers': 'SFO',
        'Seattle Seahawks': 'SEA',
        'SeattleSeahawks': 'SEA',
        'Tampa Bay Buccaneers': 'TBB',
        'Tampa BayBuccaneers': 'TBB',
        'Tennessee Titans': 'TEN',
        'TennesseeTitans': 'TEN',
        'Washington Commanders': 'WAS',
        'WashingtonCommanders': 'WAS',
    }

    standardized_name = team_abbreviation_mapping.get(team)
    if standardized_name:
        return standardized_name
    else:
        print(f'No team match for {team}')
        return team

In [3]:
# scraping function
def scrape_for_soup(base_url, url):
    # send GET request
    response = requests.get(base_url+url)

    # status check
    if response.status_code != 200:
        return(f"Error: {response.status_code}")

    # parse HTML
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup

In [6]:
print(base_url+game_url)

https://www.sportsbookreview.com/scores/nfl-football/matchup/304917/


In [4]:
def pull_odds_data(week_num):
    base_url = 'https://www.sportsbookreview.com'
    url = f'/betting-odds/nfl-football/?week=Week{week_num}'
    soup = scrape_for_soup(base_url, url)

    # parse links for game spread and spread history
    games = soup.find_all('div', id='tbody-nfl')
    main_links = set()
    line_links = set()
    for game in games:
        for a_tag in game.find_all('a', href=True):
            link = a_tag['href']
            if 'scores' in link:
                main_links.add(link)
            if 'line-history' in link:
                line_links.add(link)
                
    game_list = []
    # iterate through each link to scrape target data
    for game_url in list(main_links):
        # send request
        game_soup = scrape_for_soup(base_url, game_url)

        # over/under bets
        total_bets = game_soup.find('div', text=re.compile('Total'))
        over_perc = float(total_bets.previous_sibling.get_text()[:-3])
        under_perc = float(total_bets.next_sibling.get_text()[:-3])
        # pull total
        tot_div = game_soup.find('div', class_='row GameMatchup_mainColumn__cILb0')
        # find the span that contains the best total for the over and strip the first element (second element is odds if needed)
        total = float(tot_div.find('h4', text='Over').find_parent('div').find_next_sibling('div').find('span').contents[0].strip())

        # teams
        away_div = game_soup.find('div', class_=re.compile('TeamName'))
        home_div = game_soup.find('div', class_=re.compile('TeamName')).findNext('div', class_=re.compile('TeamName'))
        home_team = map_team_name(re.findall('.+?(?=\+|-|PK)',home_div.get_text())[0])
        away_team = map_team_name(re.findall('.+?(?=\+|-|PK)',away_div.get_text())[0])
        # spread bets
        spread = game_soup.find('div', text=re.compile('Spread'))
        home_perc = float(spread.next_sibling.get_text()[:-1])
        away_perc = float(spread.previous_sibling.get_text()[:-1])
        home_spread = home_div.find(class_='h5 mb-2 text-secondary opacity-75').text
        # home_spread = home_div.find(text=re.compile('\+?\-?\d+\.?\d*'))
        away_spread = away_div.find(class_='h5 mb-2 text-secondary opacity-75').text
        # away_spread = away_div.find(text=re.compile('\+?\-?\d+\.?\d*'))

        # add data to df
        row = {}
        row['week'] = int(week_num)
        row['home_team'] = home_team
        row['away_team'] = away_team
        row['home_spread'] = home_spread
        row['home_percent'] = home_perc
        row['away_percent'] = away_perc
        row['total'] = total
        row['over_percent'] = over_perc
        row['under_percent'] = under_perc
        game_list.append(row)
        
    odds_df = pd.DataFrame(game_list)
    return odds_df

def pull_result_data(week_num, year):
    # pull game results
    season_type = '2'
    url = f'https://site.api.espn.com/apis/site/v2/sports/football/nfl/scoreboard?dates={year}&seasontype={season_type}&week={week_num}'
    result = requests.get(url)
    json_result = result.json()

    result_list = []
    for event in json_result['events']:
        game = event['competitions'][0]
        result = {}
        for team in game['competitors']:
            ha = team['homeAway']
            if ha == 'home':
                home_team = map_team_name(team['team']['displayName'])
                home_score = team['score']
            else:
                away_team = map_team_name(team['team']['displayName'])
                away_score = team['score']

        result['home_team'] = home_team
        result['home_score'] = int(home_score)
        result['away_team'] = away_team
        result['away_score'] = int(away_score)
        result_list.append(result)
        
    results_df = pd.DataFrame(result_list)
    return results_df

def spread_outcome(x):
    if x['home_score'] - x['away_score'] + float(x['home_spread']) > 0:
        return x['home_team']
    if x['home_score'] - x['away_score'] + float(x['home_spread']) < 0:
        return x['away_team']
    if x['home_score'] - x['away_score'] + float(x['home_spread']) == 0:
        return 'push'
    else:
        return 'error'
    
def total_outcome(x):
    if x['home_score'] + x['away_score'] > x['total']:
        return 'over'
    if x['home_score'] + x['away_score'] < x['total']:
        return 'under'
    if x['home_score'] + x['away_score'] == x['total']:
        return 'push'
    else:
        return 'error'

    
def pull_weekly_data(week_num, year):
    odds_df = pull_odds_data(week_num)
    results_df = pull_result_data(week_num, year)
    df = pd.merge(odds_df,results_df, how='inner', on=['home_team','away_team'])
    df['spread_result'] = df.apply(lambda x: spread_outcome(x) , axis=1)
    df['total_result'] = df.apply(lambda x: total_outcome(x) , axis=1)
    return df

# iterate through multiple weeks of given year
def pull_yearly_data(year, start_week, end_week):
    all_df = pd.DataFrame()
    for i in range(int(start_week), int(end_week)+1):
        weekly_df = pull_weekly_data(str(i), str(year))
        all_df = pd.concat([all_df,weekly_df], axis=0)
    return all_df

In [23]:
df = pull_yearly_data(year='2024', start_week=1,end_week=2)

In [24]:
df

Unnamed: 0,week,home_team,away_team,home_spread,home_percent,away_percent,total,over_percent,under_percent,home_score,away_score,spread_result,total_result
0,1,LAC,LVR,-3.0,61.0,39.0,40.0,58.0,42.0,22,10,LAC,under
1,1,KCC,BAL,-3.0,47.0,53.0,45.5,43.0,57.0,27,20,KCC,over
2,1,SFO,NYJ,-3.5,58.0,42.0,43.0,46.0,54.0,32,19,SFO,over
3,1,ATL,PIT,-4.0,56.0,44.0,42.0,34.0,66.0,10,18,PIT,under
4,1,MIA,JAC,-3.5,61.0,39.0,47.5,64.0,36.0,20,17,JAC,under
5,1,CIN,NEP,-7.5,68.0,32.0,37.5,42.0,58.0,10,16,NEP,under
6,1,CHI,TEN,-4.0,52.0,48.0,40.5,38.0,62.0,24,17,CHI,over
7,1,CLE,DAL,-1.5,44.0,56.0,40.5,57.0,43.0,17,33,DAL,over
8,1,NYG,MIN,1.0,45.0,55.0,41.5,39.0,61.0,6,28,MIN,under
9,1,PHI,GBP,-1.5,60.0,40.0,48.5,51.0,49.0,34,29,PHI,over


In [None]:
# next: add win cols for favorite/dog, home/away

In [None]:
# next: pull total history/movement if possible
line_url = list(line_links)[0]
line_soup = scrape_for_soup(base_url, line_url)

In [15]:
div = game_soup.find('div', class_='row GameMatchup_mainColumn__cILb0')
print(div)

<div class="row GameMatchup_mainColumn__cILb0"><aside class="col-12 col-lg-3"><div class="mt-2"><h2 class="d-flex align-items-center justify-content-between mb-0 py-1 px-3 bg-dark h4" data-bs-theme="dark">Betting Consensus</h2><div class="d-flex justify-content-center pt-2"><div class="col-4 d-flex flex-column justify-content-center text-center text-primary fx-bold pb-3 GameMatchup_TeamName__hOY6B"><p class="h4">Dallas<br/>Cowboys</p><span class="h5 mb-2 text-secondary opacity-75">+5.5</span></div><div class="d-flex justify-content-center align-items-center mx-2 mt-4 rounded-circle fs-7 text-white bg-dark text-center GameMatchup_TagCircle__FABto">vs</div><div class="col-4 d-flex flex-column justify-content-center text-center text-primary fx-bold pb-3 GameMatchup_TeamName__hOY6B"><p class="h4">San Francisco<br/>49ers</p><span class="h5 mb-2 text-secondary opacity-75">-5.5</span></div></div><div class="d-flex flex-column w-100 border-top undefined Rows_Rows__wF9TM"><div class="d-flex ali

#### Graveyard

In [None]:
# iterate through each link to scrape target data
game_url = list(main_links)[0]
# send request
game_soup = scrape_for_soup(base_url, game_url)

# spread bets
spread = game_soup.find('div', text=re.compile('Spread'))
home_perc = spread.next_sibling.get_text()[:-1]
away_perc = spread.previous_sibling.get_text()[:-1]

# over/under bets
total_bets = game_soup.find('div', text=re.compile('Total'))
over_perc = total_bets.previous_sibling.get_text()[:-3]
under_perc = total_bets.next_sibling.get_text()[:-3]
# pull total
tot_div = game_soup.find('div', class_='row GameMatchup_mainColumn__cILb0')
# find the span that contains the best total for the over and strip the first element (second element is odds if needed)
total = float(tot_div.find('h4', text='Over').find_parent('div').find_next_sibling('div').find('span').contents[0].strip())

# teams and spread
away_div = game_soup.find('div', class_=re.compile('TeamName'))
home_div = game_soup.find('div', class_=re.compile('TeamName')).findNext('div', class_=re.compile('TeamName'))
home_team = re.findall('.+?(?=\+|-|PK)',home_div.get_text())[0]
away_team = re.findall('.+?(?=\+|-|PK)',away_div.get_text())[0]
away_spread = away_div.find(text=re.compile('\+?\-?\d+\.?\d*'))
home_spread = home_div.find(text=re.compile('\+?\-?\d+\.?\d*'))
print(f'The spread is {home_spread}')
print(f'% bets {home_team} {home_spread}: {home_perc}')
print(f'% bets {away_team} {away_spread} % bets: {away_perc} \n')
print(f'The total is {str(total)}')
print(f'% bets over {str(total)}: {over_perc}')
print(f'% bets under {str(total)}:{under_perc}')