In [1]:
import pandas as pd
import sys

In [2]:
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup

In [3]:
def draftkings_parse(input_dict, driver, key):
    outcomes = [x.text for x in driver.find_elements_by_class_name(input_dict['outcomes_class'])]
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    odds = [x.text for x in soup.findAll('span',{'class', input_dict['odds_class']})]
    # odds = [x.text for x in driver.find_elements_by_class_name(input_dict['odds_class'])]
    return list(zip(outcomes[::3], outcomes[2::3], odds[::3], odds[1::3], odds[2::3], [key] * len(outcomes[::3]), [datetime.now()] * len(outcomes[::3])))

def fanduel_parse(input_dict, driver, key):
    outcomes = [x.text for x in driver.find_elements_by_class_name(input_dict['outcomes_class'])]
    odds = [x.text for x in driver.find_elements_by_class_name(input_dict['odds_class'])]
    return list(zip(outcomes[::2], outcomes[1::2], odds[::3], odds[1::3], odds[2::3], [key] * len(outcomes[::3]), [datetime.now()] * len(outcomes[::3])))

def bovada_parse(input_dict, driver, key):
    outcomes = [x.text for x in driver.find_elements_by_class_name(input_dict['outcomes_class'])]
    odds = [x.text for x in driver.find_elements_by_class_name(input_dict['odds_class'])]
    return list(zip(outcomes[::2], outcomes[1::2], odds[2::7], odds[4::7], odds[3::7], [key] * len(outcomes[::3]), [datetime.now()] * len(outcomes[::3])))

In [4]:
sys.platform

'darwin'

In [5]:
prem_dir = {'draftkings':{'url':'https://sportsbook.draftkings.com/leagues/soccer/53591936',
                          'parser': draftkings_parse,
                          'args':{'outcomes_class':'sportsbook-outcome-cell__label',
                                  'odds_class':'sportsbook-odds american default-color'}},
            'fanduel':{'url':'https://sportsbook.fanduel.com/sports/navigation/730.1/7567.1',
                       'parser': fanduel_parse,
                       'args':{'outcomes_class':'name',
                               'odds_class':'selectionprice'}},
            'bovada':{'url':'https://www.bovada.lv/sports/soccer/england-premier-league',
                      'parser': bovada_parse,
                      'args':{'outcomes_class':'name',
                              'odds_class':'bet-price'}}
           }
           

In [6]:
options = Options()
options.headless = True
options.add_argument("--window-size=1920,1200")

if sys.platform == 'darwin':
    DRIVER_PATH = 'driver/mac/chromedriver'
elif sys.platform == 'linux' or sys.platform == 'linux2':
    DRIVER_PATH = 'driver/linux/chromedriver'
elif sys.platform == 'win32':
    DRIVER_PATH = 'driver/windows/chromedriver'

column_names = ['home_team','away_team', 'home_odds','tie_odds', 'away_odds', 'source', 'datetime']

for key in prem_dir:    
    driver = webdriver.Chrome(options=options, executable_path=DRIVER_PATH)
    driver.get(prem_dir[key]['url'])
    driver.implicitly_wait(3)

    data = prem_dir[key]['parser'](prem_dir[key]['args'],driver,key)
    
    ### Needs to add data persistence
    
    print(pd.DataFrame(data, columns=column_names).head())
    
    driver.quit()

      home_team       away_team home_odds tie_odds away_odds      source  \
0      West Ham         Watford      +143     +175      +265  draftkings   
1  Norwich City         Burnley      +240     +230      +123  draftkings   
2   Bournemouth     Southampton      +140     +270      +180  draftkings   
3     Tottenham  Leicester City      +135     +245      +200  draftkings   
4      Brighton       Newcastle      -112     +240      +340  draftkings   

                    datetime  
0 2020-07-16 21:52:34.734008  
1 2020-07-16 21:52:34.734008  
2 2020-07-16 21:52:34.734008  
3 2020-07-16 21:52:34.734008  
4 2020-07-16 21:52:34.734008  
     home_team    away_team home_odds tie_odds away_odds   source  \
0     West Ham      Watford      +140     +180      +260  fanduel   
1      Norwich      Burnley      +240     +230      +120  fanduel   
2  Bournemouth  Southampton      +135     +260      +185  fanduel   
3    Tottenham    Leicester      +135     +230      +200  fanduel   
4    Sheff U

In [7]:
matches_538 = pd.read_csv('https://projects.fivethirtyeight.com/soccer-api/club/spi_matches_latest.csv')

In [8]:
prem_matches = matches_538[matches_538['league'] == 'Barclays Premier League']

In [9]:
columns = ['date', 'league', 'team1', 'team2', 'spi1',
       'spi2', 'prob1', 'prob2', 'probtie', 'proj_score1', 'proj_score2',
       'importance1', 'importance2', 'score1', 'score2', 'xg1', 'xg2', 'nsxg1',
       'nsxg2', 'adj_score1', 'adj_score2']

In [10]:
prem_matches[columns].sort_values('date', ascending=False)

Unnamed: 0,date,league,team1,team2,spi1,spi2,prob1,prob2,probtie,proj_score1,...,importance1,importance2,score1,score2,xg1,xg2,nsxg1,nsxg2,adj_score1,adj_score2
9092,2020-07-26,Barclays Premier League,Arsenal,Watford,78.57,70.86,0.4952,0.2507,0.2541,1.62,...,,,,,,,,,,
9091,2020-07-26,Barclays Premier League,West Ham United,Aston Villa,70.01,64.07,0.4834,0.2752,0.2414,1.75,...,,,,,,,,,,
9090,2020-07-26,Barclays Premier League,Newcastle,Liverpool,61.75,91.46,0.0958,0.7430,0.1612,0.78,...,,,,,,,,,,
9089,2020-07-26,Barclays Premier League,Manchester City,Norwich City,95.31,54.93,0.9275,0.0136,0.0588,3.41,...,,,,,,,,,,
9088,2020-07-26,Barclays Premier League,Southampton,Sheffield United,73.87,72.16,0.4321,0.2890,0.2789,1.36,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
446,2019-08-10,Barclays Premier League,AFC Bournemouth,Sheffield United,69.61,61.84,0.5512,0.2114,0.2374,1.80,...,29.3,38.2,1.0,1.0,1.46,1.28,1.02,0.75,1.05,1.05
414,2019-08-10,Barclays Premier League,West Ham United,Manchester City,69.97,94.24,0.0997,0.7393,0.1610,0.82,...,23.4,76.1,0.0,5.0,1.27,2.99,0.51,1.84,0.00,4.20
453,2019-08-10,Barclays Premier League,Watford,Brighton and Hove Albion,67.64,62.74,0.5133,0.2371,0.2496,1.67,...,31.3,38.8,0.0,3.0,0.73,0.74,1.68,1.01,0.00,2.99
469,2019-08-10,Barclays Premier League,Tottenham Hotspur,Aston Villa,85.07,64.64,0.7290,0.0906,0.1804,2.18,...,61.8,32.3,3.0,1.0,3.09,0.69,2.41,0.46,2.64,1.05


In [12]:
prem_matches[prem_matches['date']=='2020-07-16']

Unnamed: 0,season,date,league_id,league,team1,team2,spi1,spi2,prob1,prob2,...,importance1,importance2,score1,score2,xg1,xg2,nsxg1,nsxg2,adj_score1,adj_score2
8758,2019,2020-07-16,2411,Barclays Premier League,Everton,Aston Villa,74.56,63.46,0.4572,0.2793,...,0.0,59.5,1.0,1.0,1.08,1.26,1.54,1.02,1.05,1.05
8760,2019,2020-07-16,2411,Barclays Premier League,Leicester City,Sheffield United,80.58,73.52,0.5859,0.1605,...,100.0,2.0,2.0,0.0,3.05,0.84,1.48,0.29,1.86,0.0
8775,2019,2020-07-16,2411,Barclays Premier League,Southampton,Brighton and Hove Albion,73.64,69.29,0.4521,0.2844,...,0.0,1.8,1.0,1.0,2.39,0.89,1.28,1.02,1.05,1.05
8776,2019,2020-07-16,2411,Barclays Premier League,Crystal Palace,Manchester United,68.3,88.1,0.1098,0.6879,...,0.0,100.0,0.0,2.0,0.6,1.94,0.96,2.14,0.0,1.91
