In [1]:
# These are the python libraries we will use. 
# pandas is our python version of excel
# requests_html is our python version of a web browser (like chrome, or edge, or firefox) that we're going to 
# use to open the websites we want to get data from
import pandas as pd
import numpy as np
from requests_html import HTMLSession
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from dateutil import parser

In [2]:
def parse_odds(odds):
    if type(odds)==str:        
        if '/' in odds:
            a, b = odds.split('/')
            a = int(a)
            b = int(b)
        else:
            a = int(odds)
            b = 1
        return b / (a + b)
    return np.nan

In [3]:
url = f'https://www.oddschecker.com/football/english/premier-league'

In [4]:
session = HTMLSession()
request = session.get(url)

In [5]:
soup = BeautifulSoup(request.text, 'html.parser')

In [6]:
table = soup.find('div', {'id':'fixtures'})

In [7]:
rows = table.findAll('tr')

In [8]:
processed_rows = []
for row in rows:
    if 'hda-header' in row['class']:
        date = parser.parse(row.find('td').text)
    else:
        home, draw, away = [parse_odds(x.text) for x in row.findAll('p', {'class':'participant-name'})]
        
        team_names = []
        for raw_team_name in row.findAll('p', {'class':'fixtures-bet-name beta-footnote'}):
            if raw_team_name.find('span') is None:
                team_name = raw_team_name.text
            else:
                team_name = raw_team_name.text[:-5]
            team_names.append(team_name)
        
        home_team, away_team  = team_names
        prow = pd.Series([date, home_team, away_team, home, draw, away], 
                         index=['Date', 'Home Team', 'Away Team', 'Home', 'Draw', 'Away'])
        processed_rows.append(prow)

stats = pd.DataFrame(processed_rows)    
stats.to_csv(f'game_odds_{datetime.today().strftime("%Y%m%d")}.csv')