# Last Man Standing

In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import footballdata as foo
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

sns.set_context("notebook")
sns.set_style("darkgrid")

In [2]:
print(foo.MatchHistory.__doc__)

Provides pandas.DataFrames from CSV files available at
    http://www.football-data.co.uk/data.php

    Column names are explained here: http://www.football-data.co.uk/notes.txt

    Data will be downloaded as necessary and cached locally in ./data

    Parameters
    ----------
    leagues : string or iterable of league-ids to include, None for all
    seasons : string, int or list of seasons. Examples:
              '16-17'; 2016; '2016-17'; [14, 15, 16]
    


In [3]:
foo.MatchHistory.available_leagues()

['BEL-Jupiler League',
 'ENG-Championship',
 'ENG-Conference',
 'ENG-League 1',
 'ENG-League 2',
 'ENG-Premier League',
 'ESP-La Liga',
 'ESP-La Liga 2',
 'FRA-Ligue 1',
 'FRA-Ligue 2',
 'GER-Bundesliga',
 'GER-Bundesliga 2',
 'GRE-Ethniki Katigoria',
 'ITA-Serie A',
 'ITA-Serie B',
 'NED-Eredivisie',
 'POR-Liga 1',
 'SCO-Division 1',
 'SCO-Division 2',
 'SCO-Division 3',
 'SCO-Premiership',
 'TUR-Ligi 1']

In [66]:
prem = foo.MatchHistory('ENG-Premier League', range(2015, 2017)).read_games()
prem.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,date,home_team,away_team,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,...,BbAv<2.5,BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA
league,season,game_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
ENG-Premier League,1516,2015-12-19 Newcastle-Aston Villa,2015-12-19,Newcastle,Aston Villa,1,1,D,1,0,H,M Atkinson,...,1.79,28,-0.5,2.04,1.99,1.93,1.88,2.01,3.55,4.15
ENG-Premier League,1617,2016-10-22 Arsenal-Middlesbrough,2016-10-22,Arsenal,Middlesbrough,0,0,D,0,0,D,M Dean,...,2.25,33,-2.0,2.44,2.3,1.68,1.64,1.27,6.3,13.0
ENG-Premier League,1617,2017-04-04 Leicester City-Sunderland,2017-04-04,Leicester City,Sunderland,2,0,H,0,0,D,G Scott,...,2.06,23,-1.5,2.54,2.31,1.69,1.64,1.51,4.5,7.48
ENG-Premier League,1516,2015-12-30 Sunderland-Liverpool,2015-12-30,Sunderland,Liverpool,0,1,A,0,0,D,K Friend,...,1.81,28,0.5,2.32,2.25,1.74,1.68,5.77,4.09,1.66
ENG-Premier League,1617,2016-08-13 Hull City-Leicester City,2016-08-13,Hull City,Leicester City,2,1,H,1,0,H,M Dean,...,1.67,31,0.25,2.35,2.26,2.03,1.67,4.68,3.5,1.92


In [67]:
list(prem) # columns of DataFrame

['date',
 'home_team',
 'away_team',
 'FTHG',
 'FTAG',
 'FTR',
 'HTHG',
 'HTAG',
 'HTR',
 'Referee',
 'HS',
 'AS',
 'HST',
 'AST',
 'HF',
 'AF',
 'HC',
 'AC',
 'HY',
 'AY',
 'HR',
 'AR',
 'B365H',
 'B365D',
 'B365A',
 'BWH',
 'BWD',
 'BWA',
 'IWH',
 'IWD',
 'IWA',
 'LBH',
 'LBD',
 'LBA',
 'PSH',
 'PSD',
 'PSA',
 'WHH',
 'WHD',
 'WHA',
 'VCH',
 'VCD',
 'VCA',
 'Bb1X2',
 'BbMxH',
 'BbAvH',
 'BbMxD',
 'BbAvD',
 'BbMxA',
 'BbAvA',
 'BbOU',
 'BbMx>2.5',
 'BbAv>2.5',
 'BbMx<2.5',
 'BbAv<2.5',
 'BbAH',
 'BbAHh',
 'BbMxAHH',
 'BbAvAHH',
 'BbMxAHA',
 'BbAvAHA',
 'PSCH',
 'PSCD',
 'PSCA']

In [6]:
def probs_from_odds(odds_win, odds_draw, odds_lose):
    prob_win, prob_draw, prob_lose = \
        map(lambda odds: odds / (1 + odds) , [odds_win, odds_draw, odds_lose])
    vig = prob_win + prob_draw + prob_lose - 1 # bookie's cut
    prob_win_normed, prob_draw_normed, prob_lose_normed = \
        map(lambda prob: prob / (1 + vig) , [prob_win, prob_draw, prob_lose])
    return prob_win_normed, prob_draw_normed, prob_lose_normed

In [7]:
probs_from_odds(3.10, 3.30, 2.50)

(0.33787160082557843, 0.342940942173299, 0.31918745700112244)

In [68]:
# Check seasons
prem_index = prem.index.get_values()
set([idx[1] for idx in prem_index])

{'1516', '1617'}

In [24]:
prem.home_team.unique()

array(['Burnley', 'Crystal Palace', 'Everton', 'Hull City',
       'Manchester City', 'Middlesbrough', 'Southampton',
       'AFC Bournemouth', 'Arsenal', 'Chelsea', 'Manchester United',
       'Leicester City', 'Stoke City', 'Swansea City', 'Tottenham Hotspur',
       'Watford', 'West Bromwich Albion', 'Sunderland', 'West Ham United',
       'Liverpool'], dtype=object)