In [2]:
import pandas as pd
from weekly_prediction_functions import *
from data_preparation_functions import *
from sklearn.metrics import log_loss, confusion_matrix
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 100)

## 4. Weekly Predictions

In [4]:
fixture = (pd.read_csv('data/fixture.csv')
          .assign(Date=lambda df: pd.to_datetime(df.Date)))

In [5]:
fixture

Unnamed: 0,Date,Time (AEST),HomeTeam,AwayTeam,Venue,TV,Year,round,season
0,2018-08-11,5:00 AM,Man United,Leicester,"Old Trafford, Manchester","Optus, Fox Sports (delay)",2018,1,1819
1,2018-08-11,9:30 PM,Newcastle,Tottenham,"St.James’ Park, Newcastle","Optus, SBS",2018,1,1819
2,2018-08-12,12:00 AM,Bournemouth,Cardiff,"Vitality Stadium, Bournemouth",Optus,2018,1,1819
3,2018-08-12,12:00 AM,Fulham,Crystal Palace,"Craven Cottage, London",Optus,2018,1,1819
4,2018-08-12,12:00 AM,Huddersfield,Chelsea,"John Smith’s Stadium, Huddersfield","Optus, Fox Sports (delay)",2018,1,1819
...,...,...,...,...,...,...,...,...,...
375,2019-05-13,12:00 AM,Tottenham,Everton,"Tottenham Hotspur Stadium, London",Optus,2019,38,1819
376,2019-05-13,12:00 AM,Fulham,Newcastle,"Craven Cottage, London",Optus,2019,38,1819
377,2019-05-13,12:00 AM,Southampton,Huddersfield,"St.Mary’s Stadium, Southampton",Optus,2019,38,1819
378,2019-05-13,12:00 AM,Liverpool,Wolves,"Anfield, Liverpool","Optus, Fox Sports (delay)",2019,38,1819


Not possible to connect to the API and retrieve game level information from the BetFair website as the API has since been removed. <br>
Will use the weekly game info instead for this section.

In [8]:
game_info = create_game_info_df("data/weekly_game_info.csv")
game_info.head(3)

Unnamed: 0,AwayTeam,HomeTeam,awaySelectionId,drawSelectionId,homeSelectionId,draw,marketId,marketStartTime,totalMatched,eventId,eventName,homeOdds,drawOdds,awayOdds,competitionId,Date,localMarketStartTime
0,Arsenal,Cardiff,1096,58805,79343,The Draw,1.146897152,2018-09-02 12:30:00+00:00,30123.595116,28852020,Cardiff v Arsenal,7.0,4.3,1.62,10932509,2018-09-02 00:00:00+00:00,"Sun September 2, 10:30PM"
1,Bournemouth,Chelsea,1141,58805,55190,The Draw,1.146875421,2018-09-01 14:00:00+00:00,30821.329656,28851426,Chelsea v Bournemouth,1.32,6.8,12.0,10932509,2018-09-01 00:00:00+00:00,"Sun September 2, 12:00AM"
2,Fulham,Brighton,56764,58805,18567,The Draw,1.146875746,2018-09-01 14:00:00+00:00,16594.833096,28851429,Brighton v Fulham,2.36,3.5,3.5,10932509,2018-09-01 00:00:00+00:00,"Sun September 2, 12:00AM"


Weekly odds.

In [10]:
odds = (pd.read_csv('data/weekly_epl_odds.csv')
       .replace({
           'Man Utd': 'Man United',
           'C Palace': 'Crystal Palace'
       }))

In [11]:
odds.head(3)

Unnamed: 0,HomeTeam,AwayTeam,f_homeOdds,f_drawOdds,f_awayOdds
0,Leicester,Liverpool,7.8,5.1,1.48
1,Brighton,Fulham,2.36,3.5,3.5
2,Everton,Huddersfield,1.54,4.4,8.2


## Data Wrangling The Week's Game Info Into Our Feature Set

Have wrangled all this info into a feature set that we can use to predict this week's games.

In [12]:
df = create_df('data/epl_data.csv')
df.head()

Unnamed: 0,AC,AF,AR,AS,AST,AY,AwayTeam,B365A,B365D,B365H,BWA,BWD,BWH,Bb1X2,BbAH,BbAHh,BbAv<2.5,BbAv>2.5,BbAvA,BbAvAHA,BbAvAHH,BbAvD,BbAvH,BbMx<2.5,BbMx>2.5,BbMxA,BbMxAHA,BbMxAHH,BbMxD,BbMxH,BbOU,Date,Day,Div,FTAG,FTHG,FTR,HC,HF,HR,HS,HST,HTAG,HTHG,HTR,HY,HomeTeam,IWA,IWD,IWH,LBA,LBD,LBH,Month,Referee,VCA,VCD,VCH,Year,season,gameId,homeWin,awayWin,result
0,6.0,14.0,1.0,11.0,5.0,1.0,Blackburn,2.75,3.2,2.5,2.9,3.3,2.2,55.0,20.0,0.0,1.71,2.02,2.74,2.04,1.82,3.16,2.4,1.8,2.25,2.9,2.08,1.86,3.35,2.6,35.0,2005-08-13,13,E0,1.0,3.0,H,2.0,11.0,0.0,13.0,5.0,1.0,0.0,A,0.0,West Ham,2.7,3.0,2.3,2.75,3.0,2.38,8,A Wiley,2.75,3.25,2.4,2005,506,1,1,0,home
1,8.0,16.0,0.0,13.0,6.0,2.0,Bolton,3.0,3.25,2.3,3.15,3.25,2.1,56.0,22.0,-0.25,1.7,2.01,3.05,1.84,2.01,3.16,2.2,1.87,2.2,3.4,1.92,2.1,3.3,2.4,36.0,2005-08-13,13,E0,2.0,2.0,D,7.0,14.0,0.0,3.0,2.0,2.0,2.0,D,0.0,Aston Villa,3.1,3.0,2.1,3.2,3.0,2.1,8,M Riley,3.1,3.25,2.2,2005,506,2,0,0,draw
2,6.0,14.0,0.0,12.0,5.0,1.0,Man United,1.72,3.4,5.0,1.75,3.35,4.35,56.0,23.0,0.75,1.79,1.93,1.69,1.86,2.0,3.36,4.69,1.87,2.1,1.8,1.93,2.05,3.7,5.65,36.0,2005-08-13,13,E0,2.0,0.0,A,8.0,15.0,0.0,10.0,5.0,1.0,0.0,A,3.0,Everton,1.8,3.1,3.8,1.83,3.2,3.75,8,G Poll,1.8,3.3,4.5,2005,506,3,0,1,away
3,6.0,13.0,0.0,7.0,4.0,2.0,Birmingham,2.87,3.25,2.37,2.8,3.2,2.3,56.0,21.0,0.0,1.69,2.04,2.87,2.05,1.81,3.16,2.31,1.77,2.24,3.05,2.11,1.85,3.3,2.6,36.0,2005-08-13,13,E0,0.0,0.0,D,6.0,12.0,0.0,15.0,7.0,0.0,0.0,D,1.0,Fulham,2.9,3.0,2.2,2.88,3.0,2.25,8,R Styles,2.8,3.25,2.35,2005,506,4,0,0,draw
4,6.0,11.0,0.0,13.0,3.0,3.0,West Brom,5.0,3.4,1.72,4.8,3.45,1.65,55.0,23.0,-0.75,1.77,1.94,4.79,1.76,2.1,3.38,1.69,1.9,2.1,5.6,1.83,2.19,3.63,1.8,36.0,2005-08-13,13,E0,0.0,0.0,D,3.0,13.0,0.0,15.0,8.0,0.0,0.0,D,2.0,Man City,4.2,3.2,1.7,4.5,3.25,1.67,8,C Foy,5.0,3.25,1.75,2005,506,5,0,0,draw
