# Last Man Standing

In [72]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import footballdata as foo
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from matplotlib.pyplot import figure, show


sns.set_context("notebook")
sns.set_style("darkgrid")

In [3]:
print(foo.MatchHistory.__doc__)

Provides pandas.DataFrames from CSV files available at
    http://www.football-data.co.uk/data.php

    Column names are explained here: http://www.football-data.co.uk/notes.txt

    Data will be downloaded as necessary and cached locally in ./data

    Parameters
    ----------
    leagues : string or iterable of league-ids to include, None for all
    seasons : string, int or list of seasons. Examples:
              '16-17'; 2016; '2016-17'; [14, 15, 16]
    


In [4]:
foo.MatchHistory.available_leagues()

['BEL-Jupiler League',
 'ENG-Championship',
 'ENG-Conference',
 'ENG-League 1',
 'ENG-League 2',
 'ENG-Premier League',
 'ESP-La Liga',
 'ESP-La Liga 2',
 'FRA-Ligue 1',
 'FRA-Ligue 2',
 'GER-Bundesliga',
 'GER-Bundesliga 2',
 'GRE-Ethniki Katigoria',
 'ITA-Serie A',
 'ITA-Serie B',
 'NED-Eredivisie',
 'POR-Liga 1',
 'SCO-Division 1',
 'SCO-Division 2',
 'SCO-Division 3',
 'SCO-Premiership',
 'TUR-Ligi 1']

In [189]:
prem = foo.MatchHistory('ENG-Premier League', range(2016, 2017)).read_games()
prem.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,date,home_team,away_team,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,...,BbAv<2.5,BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA
league,season,game_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
ENG-Premier League,1617,2017-04-16 West Bromwich Albion-Liverpool,2017-04-16,West Bromwich Albion,Liverpool,0,1,A,0,1,A,J Moss,...,1.89,20,0.5,2.05,2.0,1.93,1.87,4.58,3.71,1.88
ENG-Premier League,1617,2016-10-22 Burnley-Everton,2016-10-22,Burnley,Everton,2,1,H,1,0,H,M Jones,...,1.74,33,1.0,1.77,1.71,2.3,2.17,5.9,3.73,1.71
ENG-Premier League,1617,2017-01-31 Sunderland-Tottenham Hotspur,2017-01-31,Sunderland,Tottenham Hotspur,0,0,D,0,0,D,L Mason,...,2.36,28,1.5,1.93,1.88,2.02,1.97,8.75,5.71,1.37
ENG-Premier League,1617,2016-08-28 Manchester City-West Ham United,2016-08-28,Manchester City,West Ham United,3,1,H,2,0,H,A Marriner,...,2.57,31,-1.5,1.85,1.77,2.2,2.08,1.26,6.5,13.25
ENG-Premier League,1617,2017-05-17 Southampton-Manchester United,2017-05-17,Southampton,Manchester United,0,0,D,0,0,D,M Dean,...,1.76,20,0.0,1.96,1.91,1.99,1.95,2.37,3.32,3.26


In [190]:
list(prem) # columns of DataFrame

['date',
 'home_team',
 'away_team',
 'FTHG',
 'FTAG',
 'FTR',
 'HTHG',
 'HTAG',
 'HTR',
 'Referee',
 'HS',
 'AS',
 'HST',
 'AST',
 'HF',
 'AF',
 'HC',
 'AC',
 'HY',
 'AY',
 'HR',
 'AR',
 'B365H',
 'B365D',
 'B365A',
 'BWH',
 'BWD',
 'BWA',
 'IWH',
 'IWD',
 'IWA',
 'LBH',
 'LBD',
 'LBA',
 'PSH',
 'PSD',
 'PSA',
 'WHH',
 'WHD',
 'WHA',
 'VCH',
 'VCD',
 'VCA',
 'Bb1X2',
 'BbMxH',
 'BbAvH',
 'BbMxD',
 'BbAvD',
 'BbMxA',
 'BbAvA',
 'BbOU',
 'BbMx>2.5',
 'BbAv>2.5',
 'BbMx<2.5',
 'BbAv<2.5',
 'BbAH',
 'BbAHh',
 'BbMxAHH',
 'BbAvAHH',
 'BbMxAHA',
 'BbAvAHA',
 'PSCH',
 'PSCD',
 'PSCA']

#### TODO check correct formula and explain going from odds $\rightarrow$ prob

In [191]:
# odds are typically given for home team
def probs_from_odds(odds_win, odds_draw, odds_lose):
    prob_win, prob_draw, prob_lose = \
        map(lambda odds: odds / (1 + odds) , [odds_win, odds_draw, odds_lose])
    vig = prob_win + prob_draw + prob_lose - 1 # bookie's cut
    prob_win_normed, prob_draw_normed, prob_lose_normed = \
        map(lambda prob: prob / (1 + vig) , [prob_win, prob_draw, prob_lose])
    return prob_win_normed, prob_draw_normed, prob_lose_normed

In [192]:
probs_from_odds(3.10, 3.30, 2.50)

(0.33787160082557843, 0.342940942173299, 0.31918745700112244)

In [193]:
# Check seasons
prem_index = prem.index.get_values()
set([idx[1] for idx in prem_index])

{'1617'}

In [194]:
teams = list(prem.home_team.unique())
num_teams = len(teams)
print(teams)

['Burnley', 'Crystal Palace', 'Everton', 'Hull City', 'Manchester City', 'Middlesbrough', 'Southampton', 'AFC Bournemouth', 'Arsenal', 'Chelsea', 'Manchester United', 'Leicester City', 'Stoke City', 'Swansea City', 'Tottenham Hotspur', 'Watford', 'West Bromwich Albion', 'Sunderland', 'West Ham United', 'Liverpool']


## Winning probabilities matrix

What we want now is the matrix $X\in [0,1]^{38 \times 20}$, with elements defined by

$$X_{i,j} := p^{(i)}_j = \text{probability that team }j\text{ wins in week }i.$$

#### TODO Refactor to take odds from different bookies, maybe in `prob_from_odds`

In [195]:
X = np.zeros( (2 * (num_teams - 1), num_teams))
X_index = np.empty((2 * (num_teams - 1), num_teams), dtype=object)
games_played_by_team = np.zeros(len(teams), dtype=int) # array keeps track of how many games each team has played

for index, row in prem.iterrows():
    j_home = teams.index(row["home_team"])
    j_away = teams.index(row["away_team"])
    i_home = games_played_by_team[j_home]
    i_away = games_played_by_team[j_away]
    home_prob, draw_prob, away_prob = probs_from_odds(row["B365H"], row["B365D"], row["B365A"])
    X[i_home, j_home] = home_prob
    X[i_away, j_away] = away_prob
    X_index[i_home, j_home] = index
    X_index[i_away, j_away] = index
    games_played_by_team[j_home] += 1
    games_played_by_team[j_away] += 1
    

In [196]:
print(np.where(X == 0))

(array([], dtype=int64), array([], dtype=int64))


In [197]:
print(X[24,5])
print(X_index[24,5])
game = prem.loc[X_index[24,5]]
home_prob, draw_prob, away_prob = probs_from_odds(game["B365H"], game["B365D"], game["B365A"])
print(home_prob, draw_prob, away_prob)

0.354387283101
('ENG-Premier League', '1617', '2017-02-11 Middlesbrough-Everton')
0.354387283101 0.341707952758 0.303904764141
