In [1]:
# These are the python libraries we will use. 
# pandas is our python version of excel
# requests_html is our python version of a web browser (like chrome, or edge, or firefox) that we're going to 
# use to open the websites we want to get data from
import pandas as pd
import numpy as np
from requests_html import HTMLSession
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from dateutil import parser

In [2]:
def parse_odds(odds):
    if (type(odds)==str) and (len(odds) > 0):
        if '/' in odds:
            a, b = odds.split('/')
            a = int(a)
            b = int(b)
        else:
            a = int(odds)
            b = 1
        return b / (a + b)
    return np.nan

In [3]:
url = f'https://www.oddschecker.com/football/english/premier-league'

In [4]:
session = HTMLSession()
request = session.get(url)

In [5]:
soup = BeautifulSoup(request.text, 'html.parser')

In [6]:
table = soup.find('div', {'id':'fixtures'})

In [7]:
rows = table.findAll('tr')

In [25]:
processed_rows = []
for row in rows:
    if 'hda-header' in row['class']:
        date = parser.parse(row.find('td').text)
    else:
        try:
            home, draw, away = [parse_odds(x.text) for x in row.findAll('p', {'class':'participant-name'})]
        except ValueError: 
            pass
        
        finally:
            team_names = []
            for raw_team_name in row.findAll('p', {'class':'fixtures-bet-name beta-footnote'}):
                if raw_team_name.find('span') is None:
                    team_name = raw_team_name.text
                else:
                    team_name = raw_team_name.text[:-5]
                team_names.append(team_name)

            home_team, away_team  = team_names
            prow = pd.Series([date, home_team, away_team, home, draw, away], 
                             index=['Date', 'Home Team', 'Away Team', 'Home', 'Draw', 'Away'])
            processed_rows.append(prow)

stats = pd.DataFrame(processed_rows)    
stats.to_csv(f'game_odds_{datetime.today().strftime("%Y%m%dT%H%M%S")}.csv')

In [9]:
stats.set_index(['Date', 'Home Team', 'Away Team'], inplace=True)

In [10]:
norm_stats = stats.div(stats.sum(axis=1), axis=0)

In [11]:
norm_stats

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Home,Draw,Away
Date,Home Team,Away Team,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-08-23,Aston Villa,Everton,0.293211,0.273348,0.433442
2019-08-24,Norwich,Chelsea,0.231884,0.246377,0.521739
2019-08-24,Brighton,Southampton,0.395304,0.292326,0.312371
2019-08-24,Man Utd,Crystal Palace,0.718919,0.187387,0.093694
2019-08-24,Sheffield Utd,Leicester,0.273698,0.288385,0.437917
2019-08-24,Watford,West Ham,0.45,0.275,0.275
2019-08-24,Liverpool,Arsenal,0.648271,0.198803,0.152926
2019-08-25,Bournemouth,Man City,0.065589,0.129452,0.804958
2019-08-25,Tottenham,Newcastle,0.79558,0.138122,0.066298
2019-08-25,Wolves,Burnley,0.527473,0.274725,0.197802


In [12]:
bankroll = 100

In [13]:
stats.sum(axis=1).sort_values()

Date        Home Team       Away Team     
2019-08-31  West Ham        Norwich           1.002707
2019-08-23  Aston Villa     Everton           1.003094
2019-08-25  Tottenham       Newcastle         1.005556
2019-08-24  Liverpool       Arsenal           1.006020
            Brighton        Southampton       1.006131
2019-08-31  Chelsea         Sheffield Utd     1.008658
            Burnley         Liverpool         1.008741
2019-08-24  Watford         West Ham          1.010101
2019-08-25  Wolves          Burnley           1.011111
2019-08-31  Southampton     Man Utd           1.012821
2019-09-01  Arsenal         Tottenham         1.013228
2019-08-24  Norwich         Chelsea           1.014706
2019-08-31  Leicester       Bournemouth       1.014736
2019-08-24  Sheffield Utd   Leicester         1.014905
2019-08-25  Bournemouth     Man City          1.016427
2019-08-24  Man Utd         Crystal Palace    1.016484
2019-08-31  Man City        Brighton          1.017904
2019-09-01  Everton   

In [14]:
bet_amounts = norm_stats.mul(1-stats.sum(axis=1), axis=0)
bet_amounts = bet_amounts[bet_amounts > 0]
bet_amounts = bet_amounts.mul(bankroll / bet_amounts.sum(axis=1), axis=0)
bet_amounts.dropna(inplace=True)

In [15]:
pd.concat([bet_amounts, bet_amounts.sum(axis=1).rename('Total')], axis=1).applymap(lambda x: f'£{x:0.2f}')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Home,Draw,Away,Total
Date,Home Team,Away Team,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


In [16]:
bet_amounts.div(stats, axis=0).dropna().applymap(lambda x: f'£{x:0.2f}')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Home,Draw,Away
Date,Home Team,Away Team,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


In [17]:
bet_amounts.div(stats, axis=0).sub(bet_amounts.sum(axis=1), axis=0).dropna().applymap(lambda x: f'£{x:0.2f}')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Home,Draw,Away
Date,Home Team,Away Team,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


In [18]:
1/stats

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Home,Draw,Away
Date,Home Team,Away Team,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-08-23,Aston Villa,Everton,3.4,3.647059,2.3
2019-08-24,Norwich,Chelsea,4.25,4.0,1.888889
2019-08-24,Brighton,Southampton,2.514286,3.4,3.181818
2019-08-24,Man Utd,Crystal Palace,1.368421,5.25,10.5
2019-08-24,Sheffield Utd,Leicester,3.6,3.416667,2.25
2019-08-24,Watford,West Ham,2.2,3.6,3.6
2019-08-24,Liverpool,Arsenal,1.533333,5.0,6.5
2019-08-25,Bournemouth,Man City,15.0,7.6,1.222222
2019-08-25,Tottenham,Newcastle,1.25,7.2,15.0
2019-08-25,Wolves,Burnley,1.875,3.6,5.0
