# Value Betting

Source data: https://www.football-data.co.uk/englandm.php

And: https://fixturedownload.com/results/epl-2020

In [2]:
from collections import Counter

import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn

from sklearn.linear_model import Ridge, PoissonRegressor, Perceptron
from sklearn.svm import SVC
from sklearn.experimental import enable_hist_gradient_boosting  # noqa
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.metrics import accuracy_score, plot_precision_recall_curve, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [4]:
# Load fixture
converter = {
    "Man Utd": "Man United",
    "Sheffield Utd": "Sheffield United",
    "Spurs": "Tottenham",
}

def convert_team(team):
    if team in converter.keys():
        return converter[team]
    return team


fix = pd.read_csv("data/epl-2020-GMTStandardTime.csv")

fix["Home Team"] = fix["Home Team"].apply(convert_team)
fix["Away Team"] = fix["Away Team"].apply(convert_team)

# Load historic results and fixtures
csv_files = {
    "https://www.football-data.co.uk/mmz4281/2021/E0.csv": "2020-21",  # Refresh with latest results
    #    "data/E2020_21.csv": "2020-21",
    "data/E2019_20.csv": "2019-20",
    "data/E2018_19.csv": "2018-19",
    "data/E2017_18.csv": "2017-18",
    "data/E2016_17.csv": "2016-17",
    "data/E2015_16.csv": "2015-16",
    "data/E2014_15.csv": "2014-15",
}

df = pd.DataFrame()

for file, season in csv_files.items():
    _data = pd.read_csv(file)
    _data["Season"] = season
    df = df.append(_data)
    
df.shape

(2359, 131)

# Select data

In [5]:
cols = [
    "Season",
    "Date",
    "HomeTeam",
    "AwayTeam",
    "FTHG",
    "FTAG",
#    "FTR",
    "B365H",
    "B365D",
    "B365A",
]

df = df[cols]
df["Date"] = pd.to_datetime(df["Date"])
df["TG"] = df["FTHG"] + df["FTAG"]

In [6]:
df.dropna(inplace=True)
#df.info()

In [7]:
df = pd.concat([df, pd.get_dummies(df['HomeTeam'], prefix='homeDummy'), pd.get_dummies(df['AwayTeam'], prefix='awayDummy')], axis=1)

In [8]:
df_data = df.drop(labels=[
    'Season',
    'Date',
#    'FTR',
    'TG',
    'HomeTeam',
    'AwayTeam',
    'B365H',
    'B365D',
    'B365A',
], axis=1)

In [9]:
df_data.head()

Unnamed: 0,FTHG,FTAG,homeDummy_Arsenal,homeDummy_Aston Villa,homeDummy_Bournemouth,homeDummy_Brighton,homeDummy_Burnley,homeDummy_Cardiff,homeDummy_Chelsea,homeDummy_Crystal Palace,homeDummy_Everton,homeDummy_Fulham,homeDummy_Huddersfield,homeDummy_Hull,homeDummy_Leeds,homeDummy_Leicester,homeDummy_Liverpool,homeDummy_Man City,homeDummy_Man United,homeDummy_Middlesbrough,homeDummy_Newcastle,homeDummy_Norwich,homeDummy_QPR,homeDummy_Sheffield United,homeDummy_Southampton,homeDummy_Stoke,homeDummy_Sunderland,homeDummy_Swansea,homeDummy_Tottenham,homeDummy_Watford,homeDummy_West Brom,homeDummy_West Ham,homeDummy_Wolves,awayDummy_Arsenal,awayDummy_Aston Villa,awayDummy_Bournemouth,awayDummy_Brighton,awayDummy_Burnley,awayDummy_Cardiff,awayDummy_Chelsea,awayDummy_Crystal Palace,awayDummy_Everton,awayDummy_Fulham,awayDummy_Huddersfield,awayDummy_Hull,awayDummy_Leeds,awayDummy_Leicester,awayDummy_Liverpool,awayDummy_Man City,awayDummy_Man United,awayDummy_Middlesbrough,awayDummy_Newcastle,awayDummy_Norwich,awayDummy_QPR,awayDummy_Sheffield United,awayDummy_Southampton,awayDummy_Stoke,awayDummy_Sunderland,awayDummy_Swansea,awayDummy_Tottenham,awayDummy_Watford,awayDummy_West Brom,awayDummy_West Ham,awayDummy_Wolves
0,0.0,3.0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1.0,0.0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
2,4.0,3.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0.0,2.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
4,0.0,3.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [10]:
df_data.tail()

Unnamed: 0,FTHG,FTAG,homeDummy_Arsenal,homeDummy_Aston Villa,homeDummy_Bournemouth,homeDummy_Brighton,homeDummy_Burnley,homeDummy_Cardiff,homeDummy_Chelsea,homeDummy_Crystal Palace,homeDummy_Everton,homeDummy_Fulham,homeDummy_Huddersfield,homeDummy_Hull,homeDummy_Leeds,homeDummy_Leicester,homeDummy_Liverpool,homeDummy_Man City,homeDummy_Man United,homeDummy_Middlesbrough,homeDummy_Newcastle,homeDummy_Norwich,homeDummy_QPR,homeDummy_Sheffield United,homeDummy_Southampton,homeDummy_Stoke,homeDummy_Sunderland,homeDummy_Swansea,homeDummy_Tottenham,homeDummy_Watford,homeDummy_West Brom,homeDummy_West Ham,homeDummy_Wolves,awayDummy_Arsenal,awayDummy_Aston Villa,awayDummy_Bournemouth,awayDummy_Brighton,awayDummy_Burnley,awayDummy_Cardiff,awayDummy_Chelsea,awayDummy_Crystal Palace,awayDummy_Everton,awayDummy_Fulham,awayDummy_Huddersfield,awayDummy_Hull,awayDummy_Leeds,awayDummy_Leicester,awayDummy_Liverpool,awayDummy_Man City,awayDummy_Man United,awayDummy_Middlesbrough,awayDummy_Newcastle,awayDummy_Norwich,awayDummy_QPR,awayDummy_Sheffield United,awayDummy_Southampton,awayDummy_Stoke,awayDummy_Sunderland,awayDummy_Swansea,awayDummy_Tottenham,awayDummy_Watford,awayDummy_West Brom,awayDummy_West Ham,awayDummy_Wolves
375,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
376,5.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
377,2.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
378,2.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
379,6.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [11]:
def calc_winner(fthg, ftag, draw_diff=0.1):
    # 0 = home winner
    # 1 = draw winner
    # 2 = away winner
    if fthg - ftag <= 0 - draw_diff:
        # away winner
        return 'AWAY'
    elif fthg - ftag >= 0 + draw_diff:
        # home winner
        return 'HOME'
    elif - draw_diff < fthg - ftag < draw_diff:
        # draw
        return 'DRAW'
    else:
        print(f'fthg: {fthg}')
        print(f'ftag: {ftag}')
        print(f'draw_diff: {draw_diff}')
        raise Exception('Something is up.')

In [12]:
df_data['result'] = df_data.apply(lambda x: calc_winner(x['FTHG'], x['FTAG']), axis=1)

# Decide model type

In [16]:
binary_model = True
take = 40

In [17]:
df_train = df_data[take:].copy()
df_train.dropna(inplace=True)
X_train = df_train.drop(labels=['FTHG', 'FTAG', 'result'], axis=1)
if binary_model:
    y_train = df_train[['result']]
else:
    y_train = df_train[['FTHG', 'FTAG']]
    

In [18]:
df_test = df_data[:take].copy()
df_test.dropna(inplace=True)
X_test = df_test.drop(labels=['FTHG', 'FTAG', 'result'], axis=1)
if binary_model:
    y_test = df_test[['result']]
else:
    y_test = df_test[['FTHG', 'FTAG']]


In [42]:
#param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100], 'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'shrinking': [True, False], 'decision_function_shape': ['ovo', 'ovr']}
param_grid = {'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'C': [0.9, 1, 1.1], 'decision_function_shape': ['ovo', 'ovr']}

In [43]:
#model = sklearn.grid_search.GridSearchCV(SVC, param_grid, scoring="precision")
from sklearn.model_selection import GridSearchCV
model = GridSearchCV(SVC(probability=True), param_grid=param_grid, verbose=10, n_jobs=4)

# Decide model

In [44]:
%%time
# model = Ridge().fit(X_train, y_train)
le = LabelEncoder().fit(y_train)
model.fit(X_train, le.transform(y_train))
#model = GaussianNB().fit(X_train, le.transform(y_train))
#model = DecisionTreeClassifier().fit(X_train, le.transform(y_train))



Fitting 5 folds for each of 24 candidates, totalling 120 fits


  return f(**kwargs)
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   5 tasks      | elapsed:    3.8s
[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:    5.6s
[Parallel(n_jobs=4)]: Done  17 tasks      | elapsed:    9.2s
[Parallel(n_jobs=4)]: Done  24 tasks      | elapsed:   11.2s
[Parallel(n_jobs=4)]: Done  33 tasks      | elapsed:   15.7s
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   19.4s
[Parallel(n_jobs=4)]: Done  53 tasks      | elapsed:   24.6s
[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:   30.4s
[Parallel(n_jobs=4)]: Done  77 tasks      | elapsed:   36.1s
[Parallel(n_jobs=4)]: Done  90 tasks      | elapsed:   42.0s
[Parallel(n_jobs=4)]: Done 105 tasks      | elapsed:   48.4s
[Parallel(n_jobs=4)]: Done 120 out of 120 | elapsed:   55.9s finished


CPU times: user 2.47 s, sys: 104 ms, total: 2.57 s
Wall time: 58.2 s


GridSearchCV(estimator=SVC(probability=True), n_jobs=4,
             param_grid={'C': [0.9, 1, 1.1],
                         'decision_function_shape': ['ovo', 'ovr'],
                         'kernel': ['linear', 'poly', 'rbf', 'sigmoid']},
             verbose=10)

In [45]:
model.best_params_

{'C': 1.1, 'decision_function_shape': 'ovo', 'kernel': 'sigmoid'}

In [46]:
# Model score
print(classification_report(y_test, le.inverse_transform(model.predict(X_test))))
print()
print(f'Accuracy: {accuracy_score(y_test, le.inverse_transform(model.predict(X_test)))}', )


              precision    recall  f1-score   support

        AWAY       0.64      0.37      0.47        19
        DRAW       0.00      0.00      0.00         5
        HOME       0.52      0.88      0.65        16

    accuracy                           0.53        40
   macro avg       0.38      0.41      0.37        40
weighted avg       0.51      0.53      0.48        40


Accuracy: 0.525


In [48]:
# Model score with threshold

threshold = 0.4
arr = model.predict_proba(X_test)
idx1, idx2 = np.where(arr >= threshold)
len(idx1), len(idx2)

_true = y_test.iloc[y_test.iloc[idx1].index.drop_duplicates()]
_pred = idx2
print(classification_report(_true, le.inverse_transform(_pred)))
print()
print(f'Accuracy: {accuracy_score(_true, le.inverse_transform(_pred))}', )


              precision    recall  f1-score   support

        AWAY       0.64      0.39      0.48        18
        DRAW       0.00      0.00      0.00         5
        HOME       0.50      0.93      0.65        14

    accuracy                           0.54        37
   macro avg       0.38      0.44      0.38        37
weighted avg       0.50      0.54      0.48        37


Accuracy: 0.5405405405405406


  _warn_prf(average, modifier, msg_start, len(result))


__________________

In [99]:
res = pd.DataFrame(model.predict_proba(X_test), columns=le.classes_)
res['Outcome'] = y_test
res['B365A'] = 1 / df[:take]['B365A']
res['B365D'] = 1 / df[:take]['B365D']
res['B365H'] = 1 / df[:take]['B365H']
res['Away diff'] = res.AWAY - res.B365A
res['Draw diff'] = res.DRAW - res.B365D
res['Home diff'] = res.HOME - res.B365H


def highlight_max(s, color="green"):
    is_max = s == s.max()
    return [f"background-color: {color}" if v else "" for v in is_max]


def highlight(s, color="orange"):
    return [f"background-color: {color}" for i in range(len(s))]


res = pd.concat([res, df[:take][['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']]], axis=1)
res['FTHG'] = res['FTHG'].apply(int)
res['FTAG'] = res['FTAG'].apply(int)
res = res[['HomeTeam', 'FTHG', 'Outcome', 'FTAG', 'AwayTeam', 'B365H', 'B365D', 'B365A', 'HOME', 'DRAW', 'AWAY', 'Home diff', 'Draw diff', 'Away diff']]
#display(res)
res.style.apply(
    lambda x: highlight(x, color='#df9a57'), subset=["HOME", "B365H", "Home diff"], axis=1).apply(
    lambda x: highlight(x, color='#DB8F43'), subset=["B365D", "DRAW", "Draw diff"], axis=1).apply(
    lambda x: highlight(x, color='#CE7A27'), subset=["B365A", "AWAY", "Away diff"], axis=1).apply(
    highlight_max, subset=["Home diff", "Draw diff", "Away diff"], axis=1).format(
    
    {
        'AWAY': "{:,.3f}",
        'DRAW': "{:,.3f}",
        'HOME': "{:,.3f}",
        #'Outcome': "{:,.3f}",
        'B365A': "{:,.3f}",
        'B365D': "{:,.3f}",
        'B365H': "{:,.3f}",
        'Home diff': "{:,.3f}",
        'Draw diff': "{:,.3f}",
        'Away diff': "{:,.3f}",
    }
)

Unnamed: 0,HomeTeam,FTHG,Outcome,FTAG,AwayTeam,B365H,B365D,B365A,HOME,DRAW,AWAY,Home diff,Draw diff,Away diff
0,Fulham,0,AWAY,3,Arsenal,0.167,0.231,0.654,0.385,0.207,0.408,0.218,-0.024,-0.246
1,Crystal Palace,1,HOME,0,Southampton,0.323,0.308,0.422,0.379,0.247,0.373,0.056,-0.06,-0.048
2,Liverpool,4,HOME,3,Leeds,0.781,0.167,0.105,0.592,0.274,0.134,-0.189,0.107,0.029
3,West Ham,0,AWAY,2,Newcastle,0.465,0.294,0.294,0.546,0.284,0.17,0.081,-0.01,-0.124
4,West Brom,0,AWAY,3,Leicester,0.263,0.278,0.513,0.429,0.252,0.319,0.166,-0.026,-0.194
5,Tottenham,0,AWAY,1,Everton,0.546,0.278,0.231,0.626,0.231,0.143,0.079,-0.047,-0.088
6,Brighton,1,AWAY,3,Chelsea,0.2,0.231,0.621,0.284,0.292,0.424,0.084,0.061,-0.197
7,Sheffield United,0,AWAY,2,Wolves,0.308,0.323,0.422,0.349,0.242,0.41,0.041,-0.081,-0.012
8,Everton,5,HOME,2,West Brom,0.667,0.238,0.154,0.568,0.266,0.166,-0.099,0.027,0.013
9,Leeds,4,HOME,3,Fulham,0.621,0.256,0.174,0.485,0.259,0.255,-0.136,0.003,0.081


# Future games

In [50]:
game_week = 9

In [51]:
fix_df = pd.DataFrame(fix[fix["Round Number"] == game_week])
fix_df = fix_df.rename({'Home Team': 'HomeTeam', 'Away Team': 'AwayTeam'}, axis=1)
fix_df

Unnamed: 0,Round Number,Date,Location,HomeTeam,AwayTeam,Result
80,9,21/11/2020 12:30,St. James' Park,Newcastle,Chelsea,
81,9,21/11/2020 15:00,Villa Park,Aston Villa,Brighton,
82,9,21/11/2020 17:30,Tottenham Hotspur Stadium,Tottenham,Man City,
83,9,21/11/2020 20:00,Old Trafford,Man United,West Brom,
84,9,22/11/2020 12:00,Craven Cottage,Fulham,Everton,
85,9,22/11/2020 14:00,Bramall Lane,Sheffield United,West Ham,
86,9,22/11/2020 16:30,Elland Road,Leeds,Arsenal,
87,9,22/11/2020 19:15,Anfield,Liverpool,Leicester,
88,9,23/11/2020 17:30,Turf Moor,Burnley,Crystal Palace,
89,9,23/11/2020 20:00,Molineux Stadium,Wolves,Southampton,


# Scraping odds with Selenium

In [62]:
from datetime import datetime
from time import sleep

from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
from selenium import webdriver

driver = webdriver.Chrome(ChromeDriverManager().install())

link = 'https://s5.sir.sportradar.com/bet365/da/1/season/77179'
driver.get(link)

#buttons = driver.find_elements(By.XPATH, '//button[text()="Vis mere"]')
print('Waiting 2 seconds for the site to load...')
sleep(2)
print('Done')
buttons = driver.find_elements_by_css_selector('.btn.btn-default.mobile-width-100.tablet-width-auto')
for idx, button in enumerate(buttons):
    button.click()
else:
    print(f'Buttons clicked: {idx}')

content = driver.page_source
soup = BeautifulSoup(content)
driver.quit()


[WDM] - Current google-chrome version is 86.0.4240
[WDM] - Get LATEST driver version for 86.0.4240


 


[WDM] - Get LATEST driver version for 86.0.4240
[WDM] - Trying to download new driver from http://chromedriver.storage.googleapis.com/86.0.4240.22/chromedriver_mac64.zip
[WDM] - Driver has been saved in cache [/Users/jess-alfredsen/.wdm/drivers/chromedriver/mac64/86.0.4240.22]


Waiting 2 seconds for the site to load...
Done
Buttons clicked: 6


In [63]:
converter_sportradar = {
    "Man United": "Man Utd",
    "Sheffield United": "Sheffield",
    "Wolves": "Wolverhampton"
}


def get_odds_from_soup(soup, home_team, away_team, date_string):
    # print(home_team, away_team, date_string)
    if home_team in converter_sportradar:
        home_team = converter_sportradar[home_team]
    if away_team in converter_sportradar:
        away_team = converter_sportradar[away_team]
    for each in soup.findAll('tr', {'class': 'cursor-pointer'}):
        # print(each)
        if home_team in each.get_text() and away_team in each.get_text() and date_string in each.get_text():
            buttons = each.find_all('button')
            odds = []
            for idx, button in enumerate(buttons):
                odds.append(button.get_text())
                if idx > 1:
                    break
            return {'B365H': float(odds[0]), 'B365D': float(odds[1]), 'B365A': float(odds[2])}

get_odds_from_soup(soup, 'Newcastle', 'Chelsea', '21/11/20')

{'B365H': 7.0, 'B365D': 4.5, 'B365A': 1.45}

In [64]:
future_odds = []
for row in fix_df.iterrows():
    date_string = datetime.strptime(row[1].Date, '%d/%m/%Y %H:%M').strftime('%d/%m/%y')
    odds = get_odds_from_soup(soup, row[1].HomeTeam, row[1].AwayTeam, date_string)
    future_odds.append(odds)


In [65]:
odds_df = pd.DataFrame(future_odds)
#df1.reset_index(drop=True, inplace=True)
fix_df.reset_index(drop=True, inplace=True)


future_df = pd.concat([fix_df, odds_df], axis=1)
future_df



X = pd.DataFrame(columns=X_test.columns)
future_df = pd.concat([future_df, pd.get_dummies(future_df['HomeTeam'], prefix='homeDummy'), pd.get_dummies(future_df['AwayTeam'], prefix='awayDummy')], axis=1)
X = X.merge(future_df, how='outer')

X.fillna(0, inplace=True)
X = X.drop([
    'Round Number',
    'Date',
    'Location',
    'HomeTeam',
    'AwayTeam',
    'Result',
    'B365H',
    'B365D',
    'B365A'
], axis=1)
res_df = pd.DataFrame(model.predict_proba(X), columns=le.classes_)


In [95]:
next_round = pd.concat([fix_df, odds_df, res_df], axis=1)

next_round['B365A'] = 1 / next_round[:take]['B365A']
next_round['B365D'] = 1 / next_round[:take]['B365D']
next_round['B365H'] = 1 / next_round[:take]['B365H']
next_round['Away diff'] = next_round.AWAY - next_round.B365A
next_round['Draw diff'] = next_round.DRAW - next_round.B365D
next_round['Home diff'] = next_round.HOME - next_round.B365H

next_round = next_round[['HomeTeam', 'AwayTeam', 'B365H', 'B365D', 'B365A', 'HOME', 'DRAW', 'AWAY', 'Home diff', 'Draw diff', 'Away diff']]
#display(res)
next_round.style.apply(
    lambda x: highlight(x, color='#df9a57'), subset=["HOME", "B365H", "Home diff"], axis=1).apply(
    lambda x: highlight(x, color='#DB8F43'), subset=["B365D", "DRAW", "Draw diff"], axis=1).apply(
    lambda x: highlight(x, color='#CE7A27'), subset=["B365A", "AWAY", "Away diff"], axis=1).apply(
    highlight_max, subset=["Home diff", "Draw diff", "Away diff"], axis=1).format(
    
    {
        'AWAY': "{:,.3f}",
        'DRAW': "{:,.3f}",
        'HOME': "{:,.3f}",
        #'Outcome': "{:,.3f}",
        'B365A': "{:,.3f}",
        'B365D': "{:,.3f}",
        'B365H': "{:,.3f}",
        'Home diff': "{:,.3f}",
        'Draw diff': "{:,.3f}",
        'Away diff': "{:,.3f}",
    }
)

Unnamed: 0,HomeTeam,AwayTeam,B365H,B365D,B365A,HOME,DRAW,AWAY,Home diff,Draw diff,Away diff
0,Newcastle,Chelsea,0.143,0.222,0.69,0.543,0.297,0.16,0.4,0.075,-0.53
1,Aston Villa,Brighton,0.476,0.278,0.312,0.472,0.254,0.274,-0.004,-0.024,-0.038
2,Tottenham,Man City,0.25,0.25,0.556,0.593,0.239,0.167,0.343,-0.011,-0.388
3,Man United,West Brom,0.769,0.182,0.1,0.306,0.245,0.448,-0.463,0.063,0.348
4,Fulham,Everton,0.25,0.263,0.546,0.451,0.226,0.323,0.201,-0.038,-0.223
5,Sheffield United,West Ham,0.333,0.308,0.417,0.437,0.275,0.288,0.104,-0.033,-0.129
6,Leeds,Arsenal,0.308,0.286,0.465,0.243,0.302,0.455,-0.064,0.016,-0.01
7,Liverpool,Leicester,0.526,0.267,0.263,0.141,0.189,0.67,-0.386,-0.078,0.407
8,Burnley,Crystal Palace,0.364,0.323,0.37,0.539,0.262,0.199,0.175,-0.061,-0.171
9,Wolves,Southampton,0.444,0.303,0.312,0.58,0.264,0.156,0.136,-0.039,-0.156


# How unfair are the bookies?

### Bet365

In [96]:
next_round['B365H'] + next_round['B365D'] + next_round['B365A']

0    1.054735
1    1.066468
2    1.055556
3    1.051049
4    1.059606
5    1.057692
6    1.058523
7    1.056140
8    1.056587
9    1.059975
dtype: float64

### Danske Spil

In [68]:
[
    (1/6.50)+(1/4.50)+(1/1.48),
    (1/2.25)+(1/3.60)+(1/3.05),
    (1/4.10)+(1/3.95)+(1/1.80),
    (1/1.32)+(1/5.50)+(1/9.00),
    (1/3.95)+(1/3.80)+(1/1.87),
    (1/3.05)+(1/3.25)+(1/2.40),
    (1/3.15)+(1/3.75)+(1/2.15),
    (1/1.90)+(1/3.95)+(1/3.70),
    (1/2.85)+(1/2.95)+(1/2.75),
    (1/2.30)+(1/3.30)+(1/3.15),
]

[1.0517440517440517,
 1.0500910746812386,
 1.0526225515419712,
 1.0505050505050506,
 1.0510818099876373,
 1.0522278268179908,
 1.0492432631967514,
 1.0497506167059796,
 1.0534966074662773,
 1.0552732291862728]