In [1]:
import time
import requests
import pandas as pd
import numpy as np
from datetime import datetime
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options

In [2]:
def clean_name(name):
    open_parenthesis_count = name.count('(')
    close_parenthesis_count = name.count(')')
    if open_parenthesis_count == 1 and close_parenthesis_count == 1:
        return name.split(')')[-1]
    return name

In [3]:
odds_data = pd.DataFrame(
    columns=['DATE', 'WINNER', 'LOSER', 'BOOKIE', 'W_MONEYLINE', 'L_MONEYLINE', 'W_POINTSPREAD', 'TOTAL']
)


driver = ChromeDriverManager().install()
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--window-size=5000,5000")

browser = webdriver.Chrome(driver, options=chrome_options)

date = '1/10/21'
formatted_date = datetime.strptime(date, '%m/%d/%y').strftime('%Y-%m-%d').replace('-', '')

bet_types = ['money-line', 'pointspread', 'totals']

bet_type_to_class = {
    'money-line': '',
    'pointspread': 'adjust-1uDgI',
    'totals': 'adjust-1uDgI'    
}

bookie_to_dvs = {
    'Bookmaker': '93',
    'Pinnacle': '238'
}

for bet_type in bet_types:
    print('------------------', bet_type, '------------------')
    URL = 'https://www.sportsbookreview.com/betting-odds/atp-tennis/{}/?date={}'.format(bet_type, formatted_date)

    browser.get(URL)
    time.sleep(3.000)
    soup = BeautifulSoup(browser.page_source, 'html.parser')

    light_games = soup.find_all('div', class_='eventMarketGridContainer-3QipG neverWrap-lD_Yj compact-2-t2Y')
    dark_games = soup.find_all('div', class_='eventMarketGridContainer-3QipG neverWrap-lD_Yj compact-2-t2Y darkBackground-LTnfM')
    games = light_games + dark_games
    for game in games:
        players = game.find_all('span', class_='participantBox-3ar9Y')
        winner = game.find('i', class_='actionIcon-3v50- sbr-icon-caret-left winnerIcon-1QZdE smallFont-W5dE8').parent.parent.text
        
        if winner == players[0].text:
            winner_index = 0
            loser = players[1].text
            loser_index = 1
        else:
            winner_index = 1
            loser = players[0].text
            loser_index = 0
            
        winner_name = clean_name(winner)
        loser_name = clean_name(loser)
            
        for bookie in bookie_to_dvs:
            if bet_type == 'money-line':
                temp = pd.Series(
                    [date, winner_name, loser_name, bookie, np.nan, np.nan, np.nan, np.nan],
                    index=['DATE', 'WINNER', 'LOSER', 'BOOKIE', 'W_MONEYLINE', 'L_MONEYLINE', 'W_POINTSPREAD', 'TOTAL']
                    )
                odds_data = odds_data.append(temp, ignore_index=True)
            
            bookie_info = game.find(
                'section',
                {'class': 'numbersContainer-29L5c', 'data-vertical-sbid': bookie_to_dvs[bookie]}
            )

            odds = bookie_info.find_all(
                'span',
                {'class': bet_type_to_class[bet_type]}
            )

            if len(odds) == 0:
                odds = ['-', '-']
            else:
                odds = [odds[0].text.replace('½', '.5'), odds[1].text.replace('½', '.5')]
                
            if bet_type == 'money-line':
                w_moneyline = odds[winner_index]
                l_moneyline = odds[loser_index]
                    
                odds_data.loc[
                    (odds_data['DATE'] == date) &
                    (odds_data['WINNER'] == winner_name) &
                    (odds_data['LOSER'] == loser_name) &
                    (odds_data['BOOKIE'] == bookie),
                    'W_MONEYLINE'
                ] = w_moneyline
                
                odds_data.loc[
                    (odds_data['DATE'] == date) &
                    (odds_data['WINNER'] == winner_name) &
                    (odds_data['LOSER'] == loser_name) &
                    (odds_data['BOOKIE'] == bookie),
                    'L_MONEYLINE'
                ] = l_moneyline
                
            elif bet_type == 'pointspread':
                w_pointspread = odds[winner_index]
                    
                odds_data.loc[
                    (odds_data['DATE'] == date) &
                    (odds_data['WINNER'] == winner_name) &
                    (odds_data['LOSER'] == loser_name) &
                    (odds_data['BOOKIE'] == bookie),
                    'W_POINTSPREAD'
                ] = w_pointspread
                
            else:
                odds_data.loc[
                    (odds_data['DATE'] == date) &
                    (odds_data['WINNER'] == winner_name) &
                    (odds_data['LOSER'] == loser_name) &
                    (odds_data['BOOKIE'] == bookie),
                    'TOTAL'
                ] = odds[0]

[WDM] - Current google-chrome version is 89.0.4389
[WDM] - Get LATEST driver version for 89.0.4389
[WDM] - Driver [/Users/brandonshimiaie/.wdm/drivers/chromedriver/mac64/89.0.4389.23/chromedriver] found in cache




------------------ money-line ------------------
------------------ pointspread ------------------
------------------ totals ------------------


In [4]:
pd.set_option('display.max_rows', None)
display(odds_data)

Unnamed: 0,DATE,WINNER,LOSER,BOOKIE,W_MONEYLINE,L_MONEYLINE,W_POINTSPREAD,TOTAL
0,1/10/21,L. Giustino,D. Istomin,Bookmaker,-149,121,-2,22
1,1/10/21,L. Giustino,D. Istomin,Pinnacle,-157,129,-2,22
2,1/10/21,S. Diez,A. Vatutin,Bookmaker,-200,161,-3.5,21.5
3,1/10/21,S. Diez,A. Vatutin,Pinnacle,-190,155,-3,21.5
4,1/10/21,E. Ymer,B. Mott,Bookmaker,-161,131,-2.5,22
5,1/10/21,E. Ymer,B. Mott,Pinnacle,-155,127,-2.5,22
6,1/10/21,T. Daniel,R. Cid Subervi,Bookmaker,-714,490,-5,19.5
7,1/10/21,T. Daniel,R. Cid Subervi,Pinnacle,-821,540,-5.5,20
8,1/10/21,J. Chardy,F. Fognini,Bookmaker,125,-153,+2,22.5
9,1/10/21,J. Chardy,F. Fognini,Pinnacle,125,-138,+2,22.5


In [18]:
print(float('-'))

ValueError: invalid literal for int() with base 10: '-155.5'