In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import logging

In [2]:
logging.basicConfig(filename='myapp.log', level=logging.INFO)

In [32]:
NFL_ODDS_URL = 'https://www.sportsline.com/nfl/odds/money-line/'

In [48]:
html = requests.get(NFL_ODDS_URL)
html.status_code

200

In [49]:
soup = BeautifulSoup(html.text, 'lxml')

In [50]:
table = soup.find("table")
games_tables = table.find_all('tbody')
len(games_tables)

14

In [51]:
table_head = table.find('thead')
table_head

<thead class="OddsTableHeader__TableHeader-sc-1le6ao1-0 bLEkYV"><tr><th class="matchup">Matchup</th><th class="projected-score">Proj Score</th><th>consensus</th><th><span><img alt="Caesars" src="/assets/img/logo/caesars-sportsbook.svg"/></span></th><th>draftkings</th><th>fanduel</th><th>westgate</th></tr></thead>

In [52]:
headers = table_head.find_all('th')
len(headers)

7

In [53]:
cols = []
for x in headers:
    if x.text == '':
        cols.append("caesers")
    else:
        cols.append(x.text)
    if x.text in ['consensus','','draftkings','fanduel','westgate']:
        if x.text == '':
            cols.append('caesers_open')
        else:
            cols.append(x.text + '_open')
cols.insert(1, 'record')
cols.append('date')
cols

['Matchup',
 'record',
 'Proj Score',
 'consensus',
 'consensus_open',
 'caesers',
 'caesers_open',
 'draftkings',
 'draftkings_open',
 'fanduel',
 'fanduel_open',
 'westgate',
 'westgate_open',
 'date']

In [54]:
len(games_tables)

14

In [55]:
rows = games_tables[1].find_all('tr')
len(rows)

5

In [56]:
rows[1].find_all('td')[2].text

'-316Open: -322'

In [57]:
rows[1].find_all('td')[2].find('span')

<span class="primary">-316</span>

In [58]:
rows[1].find_all('td')[2].find_all('div')[-1].text

'Open: -322'

In [59]:
def static_vars(**kwargs):
    def dec(func):
        for k in kwargs:
            setattr(func, k, kwargs[k])
        return func
    return dec

In [102]:
@static_vars(counter=0)
def _extract_single_game(game):
    _extract_single_game.counter += 1
    
    table_rows = game.find_all('tr')
    # table_rows[0] empty
    # table_rows[1] away team
    # table_rows[2] home team
    # table_rows[3] date
    # table_rows[4] empty
    if len(table_rows) != 5:
        logging.warning(f'Found more rows in {_extract_single_game.counter}th game than expected')
        logging.warning(f'found {len(table_rows)} rows')
    
    # Game data will hold the odds data for a single game
    game_data = []
    
    away = table_rows[1].find_all('td')
    home = table_rows[2].find_all('td')
    
    if len(home) != len(away):
        logging.warning(f'The length of the home data is not equal to the length of the away data')
        logging.warning(f'Home: {len(home)} Away: {len(away)}')
        
    if len(home) != 7:
        logging.warning(f'Home array on {_extract_single_game.counter}th game is {len(home)} should be 7')
        
    if len(away) != 7:
        logging.warning(f'Home array on {_extract_single_game.counter}th game is {len(away)} should be 7')
    
    
    for i, (away_data, home_data) in enumerate(zip(away, home)):
        # Name and Record are found in the same td entry
        if i == 0:
            # Extract Names
            name_a = away_data.find('h4').text
            name_h = home_data.find('h4').text
            
            # Extract Records
            rec_a = away_data.find('span').text
            rec_h = home_data.find('span').text
            
            game_data.append((name_a, name_h))
            game_data.append((rec_a, rec_h))
        # Projected Score case
        elif i == 1:
            game_data.append('')
        else:
            away_odds_html = away_data.find(class_='current-value')
            home_odds_html = home_data.find(class_='current-value')
            
            if away_odds_html is None or home_odds_html is None:
                logging.info(f'No odds found for game {_extract_single_game.counter} at column {i}')
                # No odds
                game_data.append(('', ''))
                # No Open odds
                game_data.append(('', ''))
                continue
            
            game_data.append((away_odds_html.text, home_odds_html.text))
            
            # The i == 1 case is a "locked" field for Proj Score
            # All other cases are odds and will have an "open" field

            open_a = away_data.find_all('div')[-1].text.split(': ')
            open_h = home_data.find_all('div')[-1].text.split(': ')

            if open_a[0].lower().strip() != 'open' or open_h[0].lower().strip() != 'open':
                logging.warning(f'Did not find "Open" field for game {_extract_single_game.counter} for away team')
                game_data.append(('', ''))
                continue


            game_data.append((open_a[1], open_h[1]))
    
    # Date and Chanel
    date, _ = table_rows[3].find_all('div')[-1].text.split(' on ')
    game_data.append(date)
    
    logging.debug(f'Data for game {_extract_single_game.counter}: {game_data}')
    
    return game_data

In [103]:
data = []
for game in games_tables:
    data.append(_extract_single_game(game))
data

[[('Colts', 'Ravens'),
  ('1-3', '3-1'),
  '',
  ('+266', '-338'),
  ('+246', '-308'),
  ('+280', '-350'),
  ('+230', '-280'),
  ('+260', '-335'),
  ('+250', '-320'),
  ('+270', '-335'),
  ('+265', '-330'),
  ('+280', '-340'),
  ('+260', '-310'),
  'Oct 12, 12:15AM UTC'],
 [('Buccaneers', 'Eagles'),
  ('4-1', '2-3'),
  '',
  ('-316', '+244'),
  ('-322', '+251'),
  ('-320', '+250'),
  ('-300', '+240'),
  ('-335', '+260'),
  ('-320', '+250'),
  ('-320', '+260'),
  ('-270', '+220'),
  ('-310', '+260'),
  ('-310', '+260'),
  'Oct 15, 12:20AM UTC'],
 [('Dolphins', 'Jaguars'),
  ('1-4', '0-5'),
  '',
  ('-186', '+157'),
  ('-165', '+140'),
  ('-200', '+170'),
  ('-160', '+140'),
  ('-195', '+165'),
  ('-180', '+155'),
  ('-190', '+160'),
  ('-162', '+136'),
  ('-190', '+170'),
  ('-185', '+165'),
  'Oct 17, 1:30PM UTC'],
 [('Chargers', 'Ravens'),
  ('4-1', '3-1'),
  '',
  ('+150', '-178'),
  ('+151', '-179'),
  ('+150', '-175'),
  ('+150', '-175'),
  ('+160', '-190'),
  ('+165', '-195'),
  (

In [107]:
df = pd.DataFrame(data, columns=cols)
#df.drop(['Proj Score'], axis=1, inplace=True)
df

Unnamed: 0,Matchup,record,Proj Score,consensus,consensus_open,caesers,caesers_open,draftkings,draftkings_open,fanduel,fanduel_open,westgate,westgate_open,date
0,"(Colts, Ravens)","(1-3, 3-1)",,"(+266, -338)","(+246, -308)","(+280, -350)","(+230, -280)","(+260, -335)","(+250, -320)","(+270, -335)","(+265, -330)","(+280, -340)","(+260, -310)","Oct 12, 12:15AM UTC"
1,"(Buccaneers, Eagles)","(4-1, 2-3)",,"(-316, +244)","(-322, +251)","(-320, +250)","(-300, +240)","(-335, +260)","(-320, +250)","(-320, +260)","(-270, +220)","(-310, +260)","(-310, +260)","Oct 15, 12:20AM UTC"
2,"(Dolphins, Jaguars)","(1-4, 0-5)",,"(-186, +157)","(-165, +140)","(-200, +170)","(-160, +140)","(-195, +165)","(-180, +155)","(-190, +160)","(-162, +136)","(-190, +170)","(-185, +165)","Oct 17, 1:30PM UTC"
3,"(Chargers, Ravens)","(4-1, 3-1)",,"(+150, -178)","(+151, -179)","(+150, -175)","(+150, -175)","(+160, -190)","(+165, -195)","(+146, -174)","(+148, -176)","(+160, -180)","(+160, -180)","Oct 17, 5:00PM UTC"
4,"(Rams, Giants)","(4-1, 1-4)",,"(-546, +402)","(-558, +411)","(-650, +450)","(-600, +425)","(-490, +360)","(-525, +385)","(-550, +410)","(-490, +380)","(-650, +475)","(-600, +450)","Oct 17, 5:00PM UTC"
5,"(Vikings, Panthers)","(2-3, 3-2)",,"(-100, -120)","(-103, -116)","(+100, -120)","(-105, -115)","(+100, -120)","(+100, -120)","(+102, -120)","(-110, -106)","(+100, -120)","(+100, -120)","Oct 17, 5:00PM UTC"
6,"(Chiefs, Football Team)","(2-3, 2-3)",,"(-291, +233)","(-293, +240)","(-300, +240)","(-300, +240)","(, )","(, )","(-275, +225)","(-245, +200)","(-300, +250)","(-300, +250)","Oct 17, 5:00PM UTC"
7,"(Texans, Colts)","(1-4, 1-3)",,"(+410, -563)","(+410, -563)","(+425, -600)","(+425, -600)","(+360, -490)","(+360, -490)","(+410, -550)","(+360, -460)","(+425, -550)","(+400, -500)","Oct 17, 5:00PM UTC"
8,"(Packers, Bears)","(4-1, 3-2)",,"(-212, +174)","(-215, +175)","(-220, +180)","(-220, +180)","(-210, +175)","(-210, +175)","(-240, +198)","(-230, +190)","(-215, +185)","(-215, +185)","Oct 17, 5:00PM UTC"
9,"(Bengals, Lions)","(3-2, 0-5)",,"(-182, +154)","(-182, +154)","(-190, +160)","(-190, +160)","(-190, +160)","(-190, +160)","(-196, +164)","(-196, +164)","(-185, +165)","(-185, +165)","Oct 17, 5:00PM UTC"


In [108]:
NFL_ODDS_URL = 'https://www.sportsline.com/nfl/odds/money-line/'
def retrieve_game_lines_table():
    html = requests.get(NFL_ODDS_URL)
    
    if html.status_code != 200:
        logging.error(f'URL: {NFL_ODDS_URL} returned status {html.status_code}')
        raise Exception
    
    soup = BeautifulSoup(html.text, 'lxml')
    
    
    table = soup.find("table")
    
    # the header of the table containing all of the column labels
    table_head = table.find('thead')
    
    headers = table_head.find_all('th')
    
    # retrieve the columns from headers
    cols = []
    for x in headers:
        # Caesers is a jpg so there is no text
        if x.text == '':
            cols.append("caesers")
        else:
            cols.append(x.text)
        
        # Add the open field to the tables
        if x.text in ['consensus','','draftkings','fanduel','westgate']:
            if x.text == '':
                cols.append('caesers_open')
            else:
                cols.append(x.text + '_open')
    
    cols.insert(1, 'record')
    cols.append('date')
    
    logging.info(f'Columns: {cols}')
    
    # game_tables will be a list where each element represents a single game's html
    games_tables = table.find_all('tbody')
    logging.info(f'Found {len(games_tables)} games')
    
    logging.info('Begin Parsing Game Data')
    # Data will hold the data for every game
    # Format: [[(`away`, `home`), (`record_away`, `record_home`), ('', ''),
    #           (`line1_away`, `line1_away`), (`line1_away_open`, `line1_home_open`),..., date],
    #            ...]
    data = []
    for game in games_tables:
        data.append(_extract_single_game(game))
    
    logging.info('Finished Parsing Game Data')
    
    df = pd.DataFrame(data, columns=cols)
    df.drop(['Proj Score'], axis=1, inplace=True)
    
    return df

In [109]:
retrieve_game_lines_table()

Unnamed: 0,Matchup,record,consensus,consensus_open,caesers,caesers_open,draftkings,draftkings_open,fanduel,fanduel_open,westgate,westgate_open,date
0,"(Colts, Ravens)","(1-3, 3-1)","(+267, -339)","(+246, -308)","(+285, -360)","(+230, -280)","(+260, -335)","(+250, -320)","(+270, -335)","(+265, -330)","(+290, -350)","(+260, -310)","Oct 12, 12:15AM UTC"
1,"(Buccaneers, Eagles)","(4-1, 2-3)","(-316, +244)","(-322, +251)","(-320, +250)","(-300, +240)","(-335, +260)","(-320, +250)","(-320, +260)","(-270, +220)","(-310, +260)","(-310, +260)","Oct 15, 12:20AM UTC"
2,"(Dolphins, Jaguars)","(1-4, 0-5)","(-186, +157)","(-165, +140)","(-200, +170)","(-160, +140)","(-195, +165)","(-180, +155)","(-190, +160)","(-162, +136)","(-190, +170)","(-185, +165)","Oct 17, 1:30PM UTC"
3,"(Texans, Colts)","(1-4, 1-3)","(+410, -563)","(+410, -563)","(+425, -600)","(+425, -600)","(+360, -490)","(+360, -490)","(+410, -550)","(+360, -460)","(+425, -550)","(+400, -500)","Oct 17, 5:00PM UTC"
4,"(Packers, Bears)","(4-1, 3-2)","(-212, +174)","(-215, +175)","(-220, +180)","(-220, +180)","(-210, +175)","(-210, +175)","(-240, +198)","(-230, +190)","(-215, +185)","(-215, +185)","Oct 17, 5:00PM UTC"
5,"(Bengals, Lions)","(3-2, 0-5)","(-182, +154)","(-182, +154)","(-190, +160)","(-190, +160)","(-190, +160)","(-190, +160)","(-196, +164)","(-196, +164)","(-185, +165)","(-185, +165)","Oct 17, 5:00PM UTC"
6,"(Chiefs, Football Team)","(2-3, 2-3)","(-291, +233)","(-293, +240)","(-300, +240)","(-300, +240)","(, )","(, )","(-275, +225)","(-245, +200)","(-300, +250)","(-300, +250)","Oct 17, 5:00PM UTC"
7,"(Chargers, Ravens)","(4-1, 3-1)","(+150, -178)","(+151, -179)","(+150, -175)","(+150, -175)","(+160, -190)","(+165, -195)","(+146, -174)","(+148, -176)","(+160, -180)","(+160, -180)","Oct 17, 5:00PM UTC"
8,"(Rams, Giants)","(4-1, 1-4)","(-546, +402)","(-558, +411)","(-650, +450)","(-600, +425)","(-490, +360)","(-525, +385)","(-550, +410)","(-490, +380)","(-650, +475)","(-600, +450)","Oct 17, 5:00PM UTC"
9,"(Vikings, Panthers)","(2-3, 3-2)","(-100, -120)","(-103, -116)","(+100, -120)","(-105, -115)","(+100, -120)","(+100, -120)","(+102, -120)","(-110, -106)","(+100, -120)","(+100, -120)","Oct 17, 5:00PM UTC"
