In [2]:
import pandas as pd
from weekly_prediction_functions import *
from data_preparation_functions import *
from sklearn.metrics import log_loss, confusion_matrix
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 100)

## 4. Weekly Predictions

In [4]:
fixture = (pd.read_csv('data/fixture.csv')
          .assign(Date=lambda df: pd.to_datetime(df.Date)))

In [5]:
fixture

Unnamed: 0,Date,Time (AEST),HomeTeam,AwayTeam,Venue,TV,Year,round,season
0,2018-08-11,5:00 AM,Man United,Leicester,"Old Trafford, Manchester","Optus, Fox Sports (delay)",2018,1,1819
1,2018-08-11,9:30 PM,Newcastle,Tottenham,"St.James’ Park, Newcastle","Optus, SBS",2018,1,1819
2,2018-08-12,12:00 AM,Bournemouth,Cardiff,"Vitality Stadium, Bournemouth",Optus,2018,1,1819
3,2018-08-12,12:00 AM,Fulham,Crystal Palace,"Craven Cottage, London",Optus,2018,1,1819
4,2018-08-12,12:00 AM,Huddersfield,Chelsea,"John Smith’s Stadium, Huddersfield","Optus, Fox Sports (delay)",2018,1,1819
...,...,...,...,...,...,...,...,...,...
375,2019-05-13,12:00 AM,Tottenham,Everton,"Tottenham Hotspur Stadium, London",Optus,2019,38,1819
376,2019-05-13,12:00 AM,Fulham,Newcastle,"Craven Cottage, London",Optus,2019,38,1819
377,2019-05-13,12:00 AM,Southampton,Huddersfield,"St.Mary’s Stadium, Southampton",Optus,2019,38,1819
378,2019-05-13,12:00 AM,Liverpool,Wolves,"Anfield, Liverpool","Optus, Fox Sports (delay)",2019,38,1819


Not possible to connect to the API and retrieve game level information from the BetFair website as the API has since been removed. <br>
Will use the weekly game info instead for this section.

In [8]:
game_info = create_game_info_df("data/weekly_game_info.csv")
game_info.head(3)

Unnamed: 0,AwayTeam,HomeTeam,awaySelectionId,drawSelectionId,homeSelectionId,draw,marketId,marketStartTime,totalMatched,eventId,eventName,homeOdds,drawOdds,awayOdds,competitionId,Date,localMarketStartTime
0,Arsenal,Cardiff,1096,58805,79343,The Draw,1.146897152,2018-09-02 12:30:00+00:00,30123.595116,28852020,Cardiff v Arsenal,7.0,4.3,1.62,10932509,2018-09-02 00:00:00+00:00,"Sun September 2, 10:30PM"
1,Bournemouth,Chelsea,1141,58805,55190,The Draw,1.146875421,2018-09-01 14:00:00+00:00,30821.329656,28851426,Chelsea v Bournemouth,1.32,6.8,12.0,10932509,2018-09-01 00:00:00+00:00,"Sun September 2, 12:00AM"
2,Fulham,Brighton,56764,58805,18567,The Draw,1.146875746,2018-09-01 14:00:00+00:00,16594.833096,28851429,Brighton v Fulham,2.36,3.5,3.5,10932509,2018-09-01 00:00:00+00:00,"Sun September 2, 12:00AM"


Weekly odds.

In [10]:
odds = (pd.read_csv('data/weekly_epl_odds.csv')
       .replace({
           'Man Utd': 'Man United',
           'C Palace': 'Crystal Palace'
       }))

In [11]:
odds.head(3)

Unnamed: 0,HomeTeam,AwayTeam,f_homeOdds,f_drawOdds,f_awayOdds
0,Leicester,Liverpool,7.8,5.1,1.48
1,Brighton,Fulham,2.36,3.5,3.5
2,Everton,Huddersfield,1.54,4.4,8.2


## Data Wrangling The Week's Game Info Into Our Feature Set

Have wrangled all this info into a feature set that we can use to predict this week's games.

In [12]:
df = create_df('data/epl_data.csv')
df.head()

Unnamed: 0,AC,AF,AR,AS,AST,AY,AwayTeam,B365A,B365D,B365H,BWA,BWD,BWH,Bb1X2,BbAH,BbAHh,BbAv<2.5,BbAv>2.5,BbAvA,BbAvAHA,BbAvAHH,BbAvD,BbAvH,BbMx<2.5,BbMx>2.5,BbMxA,BbMxAHA,BbMxAHH,BbMxD,BbMxH,BbOU,Date,Day,Div,FTAG,FTHG,FTR,HC,HF,HR,HS,HST,HTAG,HTHG,HTR,HY,HomeTeam,IWA,IWD,IWH,LBA,LBD,LBH,Month,Referee,VCA,VCD,VCH,Year,season,gameId,homeWin,awayWin,result
0,6.0,14.0,1.0,11.0,5.0,1.0,Blackburn,2.75,3.2,2.5,2.9,3.3,2.2,55.0,20.0,0.0,1.71,2.02,2.74,2.04,1.82,3.16,2.4,1.8,2.25,2.9,2.08,1.86,3.35,2.6,35.0,2005-08-13,13,E0,1.0,3.0,H,2.0,11.0,0.0,13.0,5.0,1.0,0.0,A,0.0,West Ham,2.7,3.0,2.3,2.75,3.0,2.38,8,A Wiley,2.75,3.25,2.4,2005,506,1,1,0,home
1,8.0,16.0,0.0,13.0,6.0,2.0,Bolton,3.0,3.25,2.3,3.15,3.25,2.1,56.0,22.0,-0.25,1.7,2.01,3.05,1.84,2.01,3.16,2.2,1.87,2.2,3.4,1.92,2.1,3.3,2.4,36.0,2005-08-13,13,E0,2.0,2.0,D,7.0,14.0,0.0,3.0,2.0,2.0,2.0,D,0.0,Aston Villa,3.1,3.0,2.1,3.2,3.0,2.1,8,M Riley,3.1,3.25,2.2,2005,506,2,0,0,draw
2,6.0,14.0,0.0,12.0,5.0,1.0,Man United,1.72,3.4,5.0,1.75,3.35,4.35,56.0,23.0,0.75,1.79,1.93,1.69,1.86,2.0,3.36,4.69,1.87,2.1,1.8,1.93,2.05,3.7,5.65,36.0,2005-08-13,13,E0,2.0,0.0,A,8.0,15.0,0.0,10.0,5.0,1.0,0.0,A,3.0,Everton,1.8,3.1,3.8,1.83,3.2,3.75,8,G Poll,1.8,3.3,4.5,2005,506,3,0,1,away
3,6.0,13.0,0.0,7.0,4.0,2.0,Birmingham,2.87,3.25,2.37,2.8,3.2,2.3,56.0,21.0,0.0,1.69,2.04,2.87,2.05,1.81,3.16,2.31,1.77,2.24,3.05,2.11,1.85,3.3,2.6,36.0,2005-08-13,13,E0,0.0,0.0,D,6.0,12.0,0.0,15.0,7.0,0.0,0.0,D,1.0,Fulham,2.9,3.0,2.2,2.88,3.0,2.25,8,R Styles,2.8,3.25,2.35,2005,506,4,0,0,draw
4,6.0,11.0,0.0,13.0,3.0,3.0,West Brom,5.0,3.4,1.72,4.8,3.45,1.65,55.0,23.0,-0.75,1.77,1.94,4.79,1.76,2.1,3.38,1.69,1.9,2.1,5.6,1.83,2.19,3.63,1.8,36.0,2005-08-13,13,E0,0.0,0.0,D,3.0,13.0,0.0,15.0,8.0,0.0,0.0,D,2.0,Man City,4.2,3.2,1.7,4.5,3.25,1.67,8,C Foy,5.0,3.25,1.75,2005,506,5,0,0,draw


Important now to specify which game week we would like to predict. Then to filter the fixture for this game week and append this info into the main DataFrame.

In [13]:
round_to_predict = int(input("Which game week would you like to predict? Please input next week's Game Week\n"))

Which game week would you like to predict? Please input next week's Game Week
4


In [16]:
future_predictions = (fixture.loc[fixture['round'] == round_to_predict, ['Date', 'HomeTeam', 'AwayTeam', 'season']]
                     .pipe(pd.merge, odds, on=['HomeTeam', 'AwayTeam'])
                        .rename(columns={
                            'f_homeOdds': 'B365H',
                            'f_awayOdds': 'B365A',
                            'f_drawOdds': 'B365D'})
                          .assign(season=lambda df: df.season.astype(str)))

In [34]:
df_including_future_games = (pd.read_csv('data/epl_data.csv', dtype={'season': str})
                .assign(Date=lambda df: pd.to_datetime(df.Date))
                .pipe(lambda df: df.dropna(thresh=len(df) - 2, axis=1))
                .dropna(axis=0)  # Drop rows with NAs
                .sort_values('Date')
                .append(future_predictions, sort=True)
                .reset_index(drop=True)
                .assign(gameId=lambda df: list(df.index + 1),
                       Year=lambda df: df.Date.apply(lambda row: row.year),
                       homeWin=lambda df: df.apply(lambda row: 1 if row.FTHG > row.FTAG else 0, axis=1),
                       awayWin=lambda df: df.apply(lambda row: 1 if row.FTAG > row.FTHG else 0, axis=1),
                       result=lambda df: df.apply(lambda row: 'home' if row.FTHG > row.FTAG else ('draw' if row.FTHG == row.FTAG else 'away'), axis=1)))

In [36]:
df_including_future_games.tail(10)

Unnamed: 0,AC,AF,AR,AS,AST,AY,AwayTeam,B365A,B365D,B365H,BWA,BWD,BWH,Bb1X2,BbAH,BbAHh,BbAv<2.5,BbAv>2.5,BbAvA,BbAvAHA,BbAvAHH,BbAvD,BbAvH,BbMx<2.5,BbMx>2.5,BbMxA,BbMxAHA,BbMxAHH,BbMxD,BbMxH,BbOU,Date,Day,Div,FTAG,FTHG,FTR,HC,HF,HR,HS,HST,HTAG,HTHG,HTR,HY,HomeTeam,IWA,IWD,IWH,LBA,LBD,LBH,Month,Referee,VCA,VCD,VCH,Year,season,gameId,homeWin,awayWin,result
4954,,,,,,,Liverpool,1.48,5.1,7.8,,,,,,,,,,,,,,,,,,,,,,2018-09-01,,,,,,,,,,,,,,,Leicester,,,,,,,,,,,,2018,1819,4955,0,0,away
4955,,,,,,,Fulham,3.5,3.5,2.36,,,,,,,,,,,,,,,,,,,,,,2018-09-02,,,,,,,,,,,,,,,Brighton,,,,,,,,,,,,2018,1819,4956,0,0,away
4956,,,,,,,Man United,1.7,3.9,6.6,,,,,,,,,,,,,,,,,,,,,,2018-09-02,,,,,,,,,,,,,,,Burnley,,,,,,,,,,,,2018,1819,4957,0,0,away
4957,,,,,,,Bournemouth,12.0,6.8,1.32,,,,,,,,,,,,,,,,,,,,,,2018-09-02,,,,,,,,,,,,,,,Chelsea,,,,,,,,,,,,2018,1819,4958,0,0,away
4958,,,,,,,Southampton,4.5,3.55,2.04,,,,,,,,,,,,,,,,,,,,,,2018-09-02,,,,,,,,,,,,,,,Crystal Palace,,,,,,,,,,,,2018,1819,4959,0,0,away
4959,,,,,,,Huddersfield,8.2,4.4,1.54,,,,,,,,,,,,,,,,,,,,,,2018-09-02,,,,,,,,,,,,,,,Everton,,,,,,,,,,,,2018,1819,4960,0,0,away
4960,,,,,,,Wolves,2.98,3.5,2.62,,,,,,,,,,,,,,,,,,,,,,2018-09-02,,,,,,,,,,,,,,,West Ham,,,,,,,,,,,,2018,1819,4961,0,0,away
4961,,,,,,,Newcastle,32.0,12.5,1.12,,,,,,,,,,,,,,,,,,,,,,2018-09-02,,,,,,,,,,,,,,,Man City,,,,,,,,,,,,2018,1819,4962,0,0,away
4962,,,,,,,Arsenal,1.62,4.3,7.0,,,,,,,,,,,,,,,,,,,,,,2018-09-02,,,,,,,,,,,,,,,Cardiff,,,,,,,,,,,,2018,1819,4963,0,0,away
4963,,,,,,,Tottenham,1.68,4.3,5.9,,,,,,,,,,,,,,,,,,,,,,2018-09-03,,,,,,,,,,,,,,,Watford,,,,,,,,,,,,2018,1819,4964,0,0,away


As we can see, what we have done is appended the Game information to our main DataFrame. The rest of the info is left as NAs, but this will be filled when we created our rolling average features. <br>
This is a 'hacky' type of way to complete this task, but works well as we can use the same functions that we created previously. <br>
We now need to add our odds from our odds DataFrame, then run our create features function as usual.

## Predicting Next Gameweek's Results

Now that we have the feature DataFrame created, it is important to split the feature DataFrame up into a training set and next week's games. Then use the model we tuned to create predictions.

In [37]:
features = create_feature_df(df=df_including_future_games)

Creating all games feature DataFrame
Creating stats feature DataFrame
Creating odds feature DataFrame
Creating market values feature DataFrame
Filling NAs
Merging stats, odds and market values into one features DataFrame
Complete.


In [38]:
# Create a feature DataFrame for this week's games
production_df = pd.merge(future_predictions, features, on=['Date', 'HomeTeam', 'AwayTeam', 'season'])

In [43]:
# This entire expression filters the features DataFrame.
# By using the ~ operator, it selects only the rows where gameId is not present in production_df.gameId.
# In other words, it excludes from features all the games that are listed in production_df.

training_df = features[~features.gameId.isin(production_df.gameId)]

In [51]:
training_df

Unnamed: 0,Date,gameId,HomeTeam,season,f_homeWinPc38Home,f_homeWinPc5Home,f_awayWinPc38Home,f_awayWinPc5Home,f_eloForHome,f_eloAgainstHome,f_wtEloGoalsForHome,f_wtEloGoalsAgainstHome,f_cornersAgainstHome,f_cornersForHome,f_freesAgainstHome,f_freesForHome,f_goalsAgainstHome,f_goalsForHome,f_halfTimeGoalsAgainstHome,f_halfTimeGoalsForHome,f_redsAgainstHome,f_redsForHome,f_shotsAgainstHome,f_shotsForHome,f_shotsOnTargetAgainstHome,f_shotsOnTargetForHome,f_yellowsAgainstHome,f_yellowsForHome,f_avAsianHandicapOddsAgainstHome,f_avAsianHandicapOddsForHome,f_avgreaterthan2.5Home,f_avlessthan2.5Home,f_sizeOfHandicapHome,AwayTeam,f_homeWinPc38Away,f_homeWinPc5Away,f_awayWinPc38Away,f_awayWinPc5Away,f_eloForAway,f_eloAgainstAway,f_wtEloGoalsForAway,f_wtEloGoalsAgainstAway,f_cornersAgainstAway,f_cornersForAway,f_freesAgainstAway,f_freesForAway,f_goalsAgainstAway,f_goalsForAway,f_halfTimeGoalsAgainstAway,f_halfTimeGoalsForAway,f_redsAgainstAway,f_redsForAway,f_shotsAgainstAway,f_shotsForAway,f_shotsOnTargetAgainstAway,f_shotsOnTargetForAway,f_yellowsAgainstAway,f_yellowsForAway,f_avAsianHandicapOddsAgainstAway,f_avAsianHandicapOddsForAway,f_avgreaterthan2.5Away,f_avlessthan2.5Away,f_sizeOfHandicapAway,f_attMktH%,f_attMktA%,f_midMktH%,f_midMktA%,f_defMktH%,f_defMktA%,f_gkMktH%,f_gkMktA%,f_totalMktH%,f_totalMktA%,result,f_awayOdds,f_drawOdds,f_homeOdds
20,2005-08-23,21,Birmingham,0506,0.394737,0.4,0.263158,0.2,1478.687038,1492.866048,1.061763,1.260223,4.979592,7.530612,12.000000,9.938776,1.020408,0.510204,0.510204,0.510204,0.000000,0.000000,11.938776,8.020408,6.489796,2.979592,1.000000,2.510204,1.909000,1.945500,2.051000,1.673500,-0.137500,Middlesbrough,0.394737,0.4,0.263158,0.2,1492.866048,1478.687038,1.129940,1.279873,2.551020,5.510204,13.551020,13.428571,1.020408,0.000000,0.000000,0.000000,0.000000,0.489796,17.020408,8.081633,7.510204,2.510204,3.000000,1.489796,1.939500,1.909500,2.003500,1.715500,0.387500,5.132983,5.260851,3.341048,4.289788,3.502318,4.168935,2.332815,3.216457,3.934396,4.522205,away,2.75,3.20,2.50
21,2005-08-23,22,Portsmouth,0506,0.447368,0.4,0.263158,0.4,1405.968416,1489.229314,1.147101,1.503051,2.510204,4.959184,21.979592,16.061224,2.000000,0.510204,1.000000,0.000000,0.000000,0.000000,8.448980,10.489796,3.959184,4.448980,3.020408,1.530612,1.896500,1.969000,2.004000,1.700500,0.250000,Aston Villa,0.447368,0.4,0.263158,0.4,1489.229314,1405.968416,1.175160,1.263229,9.530612,7.000000,14.469388,17.571429,1.489796,0.979592,0.979592,0.979592,0.000000,0.000000,15.551020,3.000000,9.061224,2.510204,2.000000,0.510204,1.856500,1.977000,1.850500,1.848500,0.712500,3.738614,3.878659,4.494368,4.954673,2.884262,4.065926,3.746642,5.372543,3.743410,4.365456,draw,2.75,3.20,2.50
22,2005-08-23,23,Sunderland,0506,0.236842,0.0,0.236842,0.4,1277.888970,1552.291880,0.650176,1.543716,5.000000,5.000000,12.408163,17.551020,1.979592,0.489796,1.000000,0.489796,0.489796,0.510204,14.510204,6.897959,5.020408,3.918367,1.020408,2.510204,1.852000,1.991500,1.853500,1.850000,0.712500,Man City,0.236842,0.0,0.236842,0.4,1552.291880,1277.888970,1.288750,1.287367,7.530612,3.510204,8.959184,12.489796,0.510204,1.020408,0.510204,0.510204,0.000000,0.000000,10.959184,11.938776,2.489796,6.979592,3.000000,1.489796,1.815000,2.039500,2.006000,1.709500,-0.200000,0.706318,3.750792,1.476812,1.070209,2.634096,4.455890,0.777605,4.913050,1.499427,3.151477,away,2.50,3.20,2.75
23,2005-08-24,24,Arsenal,0506,0.736842,1.0,0.236842,0.2,1729.086068,1481.943781,2.099593,0.921523,3.000000,7.489796,17.000000,18.061224,0.510204,0.979592,0.000000,0.000000,0.489796,0.000000,5.571429,11.938776,3.551020,7.408163,1.510204,1.530612,1.945500,1.909000,1.876000,1.828500,-0.287500,Fulham,0.736842,1.0,0.236842,0.2,1481.943781,1729.086068,1.170928,1.323440,7.020408,3.448980,19.632653,13.020408,1.020408,0.510204,0.510204,0.000000,0.000000,0.000000,11.591837,11.428571,6.551020,5.469388,2.000000,1.510204,2.061000,1.799000,2.023500,1.684500,0.275000,10.807882,0.785474,8.064289,4.161925,9.116327,3.583254,3.661813,5.337198,9.031622,2.924604,home,13.00,5.50,1.22
24,2005-08-24,25,Blackburn,0506,0.263158,0.6,0.263158,0.2,1496.457214,1535.410612,1.248951,1.308457,1.489796,7.020408,12.530612,20.122449,1.979592,1.510204,0.000000,1.000000,0.000000,0.489796,10.448980,13.551020,4.489796,7.040816,1.020408,1.510204,1.803500,2.056500,2.014500,1.693500,-0.275000,Tottenham,0.263158,0.6,0.263158,0.2,1535.410612,1496.457214,1.274824,1.393005,6.489796,3.530612,11.469388,19.428571,0.000000,2.000000,0.000000,0.489796,0.000000,0.000000,11.510204,14.571429,4.959184,7.020408,0.510204,2.510204,1.914000,1.935000,1.976500,1.733500,-0.387500,1.583126,5.553120,3.477861,6.881561,4.010007,5.537488,2.297469,5.973420,2.916354,6.001831,draw,2.60,3.20,2.62
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4949,2018-08-25,4950,Bournemouth,1819,0.447368,0.6,0.157895,0.4,1549.801937,1567.656994,1.261521,1.720533,5.380406,5.765467,11.482014,9.224256,1.591704,1.359998,0.704842,0.500421,0.023102,0.024295,14.022928,12.652535,4.589902,4.339203,2.099701,1.347368,1.876216,1.994746,1.815644,2.049951,0.188785,Everton,0.447368,0.6,0.157895,0.4,1567.656994,1549.801937,1.330970,1.232383,5.442022,4.320277,10.632374,11.395848,1.401492,1.318978,0.591241,0.501612,0.128647,0.097965,13.651956,10.228199,4.534917,3.846102,1.357584,1.231665,1.993966,1.884081,2.123252,1.754956,0.106011,1.592553,4.602562,1.562375,4.967551,1.928114,3.080280,1.689039,6.400569,1.677174,4.441914,draw,2.75,3.60,2.62
4950,2018-08-26,4951,Watford,1819,0.421053,0.4,0.289474,0.6,1520.411452,1546.461760,1.018660,1.607977,5.066188,4.833421,10.483965,11.992207,1.581523,1.106250,0.735976,0.447844,0.071242,0.079449,10.826491,11.602295,4.021974,3.862998,1.662156,1.751864,1.938889,1.939490,2.053532,1.915178,0.436705,Crystal Palace,0.421053,0.4,0.289474,0.6,1546.461760,1520.411452,1.108738,1.462203,5.611883,5.300684,11.293843,11.171714,1.349860,1.350959,0.681212,0.518444,0.072246,0.041085,12.692089,11.836398,4.556892,4.137001,1.861655,1.849007,2.010493,1.870704,1.848792,2.020223,0.281209,1.700947,2.651476,2.323532,1.081644,2.363116,2.989988,0.933416,1.644591,2.010164,2.191385,home,3.00,3.25,2.60
4951,2018-08-26,4952,Newcastle,1819,0.394737,0.6,0.605263,0.8,1513.740462,1663.134224,1.263713,1.457554,5.663536,4.187155,10.328695,10.939110,1.239496,1.054580,0.586392,0.443609,0.005387,0.091234,12.452771,11.747707,4.003907,3.858941,1.621902,1.362317,1.867457,2.006749,2.024455,1.911703,0.379764,Chelsea,0.394737,0.6,0.605263,0.8,1663.134224,1513.740462,1.848987,0.860756,3.755354,5.849745,11.132337,9.514379,1.074810,1.771308,0.414233,0.893722,0.042799,0.054557,9.727137,16.066182,3.341093,5.717481,1.928424,1.130717,1.879666,1.984855,1.805033,2.053100,-0.926899,1.833687,10.622580,2.323532,12.338755,2.186764,12.897203,2.666904,11.734376,2.132119,11.813535,away,1.66,4.00,5.75
4952,2018-08-26,4953,Fulham,1819,0.289474,0.2,0.210526,0.4,1405.922861,1523.094877,1.137916,1.434621,5.975406,5.069325,10.171646,9.963712,2.186921,1.087311,0.628756,0.350691,0.030692,0.043723,16.577464,11.759445,7.149427,4.548139,1.314872,1.393248,1.884552,1.985978,1.756776,2.128261,0.502253,Burnley,0.289474,0.2,0.210526,0.4,1523.094877,1405.922861,0.938616,1.525725,6.009157,4.388209,11.883636,9.503644,1.244818,0.939679,0.422908,0.382256,0.088614,0.012340,14.509454,10.273809,4.536976,3.510299,1.211391,1.578136,1.965764,1.906055,2.282184,1.675649,0.234690,0.466927,1.784327,3.405176,1.862832,0.540813,1.975141,1.066761,2.622455,1.423612,1.915461,home,4.33,3.40,2.00


In [44]:
# Create a list of feature names from the training DataFrame that start with 'f_', indicating they are features.
feature_names = [col for col in training_df if col.startswith('f_')]

# Initialise a LabelEncoder to encode categorical labels into numeric format.
le = LabelEncoder()
# Fit the encoder to the 'result' column of training_df and transform the labels to numeric.
# Win, Draw, Loss
train_y = le.fit_transform(training_df.result)

# Select columns from training_df that are in the feature_names list to create the feature matrix.
train_x = training_df[feature_names]

In [49]:
train_x

Unnamed: 0,f_homeWinPc38Home,f_homeWinPc5Home,f_awayWinPc38Home,f_awayWinPc5Home,f_eloForHome,f_eloAgainstHome,f_wtEloGoalsForHome,f_wtEloGoalsAgainstHome,f_cornersAgainstHome,f_cornersForHome,f_freesAgainstHome,f_freesForHome,f_goalsAgainstHome,f_goalsForHome,f_halfTimeGoalsAgainstHome,f_halfTimeGoalsForHome,f_redsAgainstHome,f_redsForHome,f_shotsAgainstHome,f_shotsForHome,f_shotsOnTargetAgainstHome,f_shotsOnTargetForHome,f_yellowsAgainstHome,f_yellowsForHome,f_avAsianHandicapOddsAgainstHome,f_avAsianHandicapOddsForHome,f_avgreaterthan2.5Home,f_avlessthan2.5Home,f_sizeOfHandicapHome,f_homeWinPc38Away,f_homeWinPc5Away,f_awayWinPc38Away,f_awayWinPc5Away,f_eloForAway,f_eloAgainstAway,f_wtEloGoalsForAway,f_wtEloGoalsAgainstAway,f_cornersAgainstAway,f_cornersForAway,f_freesAgainstAway,f_freesForAway,f_goalsAgainstAway,f_goalsForAway,f_halfTimeGoalsAgainstAway,f_halfTimeGoalsForAway,f_redsAgainstAway,f_redsForAway,f_shotsAgainstAway,f_shotsForAway,f_shotsOnTargetAgainstAway,f_shotsOnTargetForAway,f_yellowsAgainstAway,f_yellowsForAway,f_avAsianHandicapOddsAgainstAway,f_avAsianHandicapOddsForAway,f_avgreaterthan2.5Away,f_avlessthan2.5Away,f_sizeOfHandicapAway,f_attMktH%,f_attMktA%,f_midMktH%,f_midMktA%,f_defMktH%,f_defMktA%,f_gkMktH%,f_gkMktA%,f_totalMktH%,f_totalMktA%,f_awayOdds,f_drawOdds,f_homeOdds
20,0.394737,0.4,0.263158,0.2,1478.687038,1492.866048,1.061763,1.260223,4.979592,7.530612,12.000000,9.938776,1.020408,0.510204,0.510204,0.510204,0.000000,0.000000,11.938776,8.020408,6.489796,2.979592,1.000000,2.510204,1.909000,1.945500,2.051000,1.673500,-0.137500,0.394737,0.4,0.263158,0.2,1492.866048,1478.687038,1.129940,1.279873,2.551020,5.510204,13.551020,13.428571,1.020408,0.000000,0.000000,0.000000,0.000000,0.489796,17.020408,8.081633,7.510204,2.510204,3.000000,1.489796,1.939500,1.909500,2.003500,1.715500,0.387500,5.132983,5.260851,3.341048,4.289788,3.502318,4.168935,2.332815,3.216457,3.934396,4.522205,2.75,3.20,2.50
21,0.447368,0.4,0.263158,0.4,1405.968416,1489.229314,1.147101,1.503051,2.510204,4.959184,21.979592,16.061224,2.000000,0.510204,1.000000,0.000000,0.000000,0.000000,8.448980,10.489796,3.959184,4.448980,3.020408,1.530612,1.896500,1.969000,2.004000,1.700500,0.250000,0.447368,0.4,0.263158,0.4,1489.229314,1405.968416,1.175160,1.263229,9.530612,7.000000,14.469388,17.571429,1.489796,0.979592,0.979592,0.979592,0.000000,0.000000,15.551020,3.000000,9.061224,2.510204,2.000000,0.510204,1.856500,1.977000,1.850500,1.848500,0.712500,3.738614,3.878659,4.494368,4.954673,2.884262,4.065926,3.746642,5.372543,3.743410,4.365456,2.75,3.20,2.50
22,0.236842,0.0,0.236842,0.4,1277.888970,1552.291880,0.650176,1.543716,5.000000,5.000000,12.408163,17.551020,1.979592,0.489796,1.000000,0.489796,0.489796,0.510204,14.510204,6.897959,5.020408,3.918367,1.020408,2.510204,1.852000,1.991500,1.853500,1.850000,0.712500,0.236842,0.0,0.236842,0.4,1552.291880,1277.888970,1.288750,1.287367,7.530612,3.510204,8.959184,12.489796,0.510204,1.020408,0.510204,0.510204,0.000000,0.000000,10.959184,11.938776,2.489796,6.979592,3.000000,1.489796,1.815000,2.039500,2.006000,1.709500,-0.200000,0.706318,3.750792,1.476812,1.070209,2.634096,4.455890,0.777605,4.913050,1.499427,3.151477,2.50,3.20,2.75
23,0.736842,1.0,0.236842,0.2,1729.086068,1481.943781,2.099593,0.921523,3.000000,7.489796,17.000000,18.061224,0.510204,0.979592,0.000000,0.000000,0.489796,0.000000,5.571429,11.938776,3.551020,7.408163,1.510204,1.530612,1.945500,1.909000,1.876000,1.828500,-0.287500,0.736842,1.0,0.236842,0.2,1481.943781,1729.086068,1.170928,1.323440,7.020408,3.448980,19.632653,13.020408,1.020408,0.510204,0.510204,0.000000,0.000000,0.000000,11.591837,11.428571,6.551020,5.469388,2.000000,1.510204,2.061000,1.799000,2.023500,1.684500,0.275000,10.807882,0.785474,8.064289,4.161925,9.116327,3.583254,3.661813,5.337198,9.031622,2.924604,13.00,5.50,1.22
24,0.263158,0.6,0.263158,0.2,1496.457214,1535.410612,1.248951,1.308457,1.489796,7.020408,12.530612,20.122449,1.979592,1.510204,0.000000,1.000000,0.000000,0.489796,10.448980,13.551020,4.489796,7.040816,1.020408,1.510204,1.803500,2.056500,2.014500,1.693500,-0.275000,0.263158,0.6,0.263158,0.2,1535.410612,1496.457214,1.274824,1.393005,6.489796,3.530612,11.469388,19.428571,0.000000,2.000000,0.000000,0.489796,0.000000,0.000000,11.510204,14.571429,4.959184,7.020408,0.510204,2.510204,1.914000,1.935000,1.976500,1.733500,-0.387500,1.583126,5.553120,3.477861,6.881561,4.010007,5.537488,2.297469,5.973420,2.916354,6.001831,2.60,3.20,2.62
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4949,0.447368,0.6,0.157895,0.4,1549.801937,1567.656994,1.261521,1.720533,5.380406,5.765467,11.482014,9.224256,1.591704,1.359998,0.704842,0.500421,0.023102,0.024295,14.022928,12.652535,4.589902,4.339203,2.099701,1.347368,1.876216,1.994746,1.815644,2.049951,0.188785,0.447368,0.6,0.157895,0.4,1567.656994,1549.801937,1.330970,1.232383,5.442022,4.320277,10.632374,11.395848,1.401492,1.318978,0.591241,0.501612,0.128647,0.097965,13.651956,10.228199,4.534917,3.846102,1.357584,1.231665,1.993966,1.884081,2.123252,1.754956,0.106011,1.592553,4.602562,1.562375,4.967551,1.928114,3.080280,1.689039,6.400569,1.677174,4.441914,2.75,3.60,2.62
4950,0.421053,0.4,0.289474,0.6,1520.411452,1546.461760,1.018660,1.607977,5.066188,4.833421,10.483965,11.992207,1.581523,1.106250,0.735976,0.447844,0.071242,0.079449,10.826491,11.602295,4.021974,3.862998,1.662156,1.751864,1.938889,1.939490,2.053532,1.915178,0.436705,0.421053,0.4,0.289474,0.6,1546.461760,1520.411452,1.108738,1.462203,5.611883,5.300684,11.293843,11.171714,1.349860,1.350959,0.681212,0.518444,0.072246,0.041085,12.692089,11.836398,4.556892,4.137001,1.861655,1.849007,2.010493,1.870704,1.848792,2.020223,0.281209,1.700947,2.651476,2.323532,1.081644,2.363116,2.989988,0.933416,1.644591,2.010164,2.191385,3.00,3.25,2.60
4951,0.394737,0.6,0.605263,0.8,1513.740462,1663.134224,1.263713,1.457554,5.663536,4.187155,10.328695,10.939110,1.239496,1.054580,0.586392,0.443609,0.005387,0.091234,12.452771,11.747707,4.003907,3.858941,1.621902,1.362317,1.867457,2.006749,2.024455,1.911703,0.379764,0.394737,0.6,0.605263,0.8,1663.134224,1513.740462,1.848987,0.860756,3.755354,5.849745,11.132337,9.514379,1.074810,1.771308,0.414233,0.893722,0.042799,0.054557,9.727137,16.066182,3.341093,5.717481,1.928424,1.130717,1.879666,1.984855,1.805033,2.053100,-0.926899,1.833687,10.622580,2.323532,12.338755,2.186764,12.897203,2.666904,11.734376,2.132119,11.813535,1.66,4.00,5.75
4952,0.289474,0.2,0.210526,0.4,1405.922861,1523.094877,1.137916,1.434621,5.975406,5.069325,10.171646,9.963712,2.186921,1.087311,0.628756,0.350691,0.030692,0.043723,16.577464,11.759445,7.149427,4.548139,1.314872,1.393248,1.884552,1.985978,1.756776,2.128261,0.502253,0.289474,0.2,0.210526,0.4,1523.094877,1405.922861,0.938616,1.525725,6.009157,4.388209,11.883636,9.503644,1.244818,0.939679,0.422908,0.382256,0.088614,0.012340,14.509454,10.273809,4.536976,3.510299,1.211391,1.578136,1.965764,1.906055,2.282184,1.675649,0.234690,0.466927,1.784327,3.405176,1.862832,0.540813,1.975141,1.066761,2.622455,1.423612,1.915461,4.33,3.40,2.00


In [50]:
# Train_y representing win, draw or loss
train_y

array([0, 1, 0, ..., 0, 2, 0])

In [52]:
# Initialise a Logistic Regression model with specific parameters.
lr = LogisticRegression(C=0.01, solver='liblinear')
# Fit the model to the training data.
lr.fit(train_x, train_y)  # train_x are the features, train_y are the labels.

# Predict the probability of outcomes for the production dataset using the trained model.
# production_df[feature_names] selects the relevant features from the production dataset.
predicted_probs = lr.predict_proba(production_df[feature_names])

# Convert the predicted probabilities to odds.
# Note: This calculation is not the conventional way to calculate odds from probabilities.
# It calculates the reciprocal of each probability, which might not align with the typical definition of odds.
predicted_odds = 1 / predicted_probs

In [53]:
train_x

Unnamed: 0,f_homeWinPc38Home,f_homeWinPc5Home,f_awayWinPc38Home,f_awayWinPc5Home,f_eloForHome,f_eloAgainstHome,f_wtEloGoalsForHome,f_wtEloGoalsAgainstHome,f_cornersAgainstHome,f_cornersForHome,f_freesAgainstHome,f_freesForHome,f_goalsAgainstHome,f_goalsForHome,f_halfTimeGoalsAgainstHome,f_halfTimeGoalsForHome,f_redsAgainstHome,f_redsForHome,f_shotsAgainstHome,f_shotsForHome,f_shotsOnTargetAgainstHome,f_shotsOnTargetForHome,f_yellowsAgainstHome,f_yellowsForHome,f_avAsianHandicapOddsAgainstHome,f_avAsianHandicapOddsForHome,f_avgreaterthan2.5Home,f_avlessthan2.5Home,f_sizeOfHandicapHome,f_homeWinPc38Away,f_homeWinPc5Away,f_awayWinPc38Away,f_awayWinPc5Away,f_eloForAway,f_eloAgainstAway,f_wtEloGoalsForAway,f_wtEloGoalsAgainstAway,f_cornersAgainstAway,f_cornersForAway,f_freesAgainstAway,f_freesForAway,f_goalsAgainstAway,f_goalsForAway,f_halfTimeGoalsAgainstAway,f_halfTimeGoalsForAway,f_redsAgainstAway,f_redsForAway,f_shotsAgainstAway,f_shotsForAway,f_shotsOnTargetAgainstAway,f_shotsOnTargetForAway,f_yellowsAgainstAway,f_yellowsForAway,f_avAsianHandicapOddsAgainstAway,f_avAsianHandicapOddsForAway,f_avgreaterthan2.5Away,f_avlessthan2.5Away,f_sizeOfHandicapAway,f_attMktH%,f_attMktA%,f_midMktH%,f_midMktA%,f_defMktH%,f_defMktA%,f_gkMktH%,f_gkMktA%,f_totalMktH%,f_totalMktA%,f_awayOdds,f_drawOdds,f_homeOdds
20,0.394737,0.4,0.263158,0.2,1478.687038,1492.866048,1.061763,1.260223,4.979592,7.530612,12.000000,9.938776,1.020408,0.510204,0.510204,0.510204,0.000000,0.000000,11.938776,8.020408,6.489796,2.979592,1.000000,2.510204,1.909000,1.945500,2.051000,1.673500,-0.137500,0.394737,0.4,0.263158,0.2,1492.866048,1478.687038,1.129940,1.279873,2.551020,5.510204,13.551020,13.428571,1.020408,0.000000,0.000000,0.000000,0.000000,0.489796,17.020408,8.081633,7.510204,2.510204,3.000000,1.489796,1.939500,1.909500,2.003500,1.715500,0.387500,5.132983,5.260851,3.341048,4.289788,3.502318,4.168935,2.332815,3.216457,3.934396,4.522205,2.75,3.20,2.50
21,0.447368,0.4,0.263158,0.4,1405.968416,1489.229314,1.147101,1.503051,2.510204,4.959184,21.979592,16.061224,2.000000,0.510204,1.000000,0.000000,0.000000,0.000000,8.448980,10.489796,3.959184,4.448980,3.020408,1.530612,1.896500,1.969000,2.004000,1.700500,0.250000,0.447368,0.4,0.263158,0.4,1489.229314,1405.968416,1.175160,1.263229,9.530612,7.000000,14.469388,17.571429,1.489796,0.979592,0.979592,0.979592,0.000000,0.000000,15.551020,3.000000,9.061224,2.510204,2.000000,0.510204,1.856500,1.977000,1.850500,1.848500,0.712500,3.738614,3.878659,4.494368,4.954673,2.884262,4.065926,3.746642,5.372543,3.743410,4.365456,2.75,3.20,2.50
22,0.236842,0.0,0.236842,0.4,1277.888970,1552.291880,0.650176,1.543716,5.000000,5.000000,12.408163,17.551020,1.979592,0.489796,1.000000,0.489796,0.489796,0.510204,14.510204,6.897959,5.020408,3.918367,1.020408,2.510204,1.852000,1.991500,1.853500,1.850000,0.712500,0.236842,0.0,0.236842,0.4,1552.291880,1277.888970,1.288750,1.287367,7.530612,3.510204,8.959184,12.489796,0.510204,1.020408,0.510204,0.510204,0.000000,0.000000,10.959184,11.938776,2.489796,6.979592,3.000000,1.489796,1.815000,2.039500,2.006000,1.709500,-0.200000,0.706318,3.750792,1.476812,1.070209,2.634096,4.455890,0.777605,4.913050,1.499427,3.151477,2.50,3.20,2.75
23,0.736842,1.0,0.236842,0.2,1729.086068,1481.943781,2.099593,0.921523,3.000000,7.489796,17.000000,18.061224,0.510204,0.979592,0.000000,0.000000,0.489796,0.000000,5.571429,11.938776,3.551020,7.408163,1.510204,1.530612,1.945500,1.909000,1.876000,1.828500,-0.287500,0.736842,1.0,0.236842,0.2,1481.943781,1729.086068,1.170928,1.323440,7.020408,3.448980,19.632653,13.020408,1.020408,0.510204,0.510204,0.000000,0.000000,0.000000,11.591837,11.428571,6.551020,5.469388,2.000000,1.510204,2.061000,1.799000,2.023500,1.684500,0.275000,10.807882,0.785474,8.064289,4.161925,9.116327,3.583254,3.661813,5.337198,9.031622,2.924604,13.00,5.50,1.22
24,0.263158,0.6,0.263158,0.2,1496.457214,1535.410612,1.248951,1.308457,1.489796,7.020408,12.530612,20.122449,1.979592,1.510204,0.000000,1.000000,0.000000,0.489796,10.448980,13.551020,4.489796,7.040816,1.020408,1.510204,1.803500,2.056500,2.014500,1.693500,-0.275000,0.263158,0.6,0.263158,0.2,1535.410612,1496.457214,1.274824,1.393005,6.489796,3.530612,11.469388,19.428571,0.000000,2.000000,0.000000,0.489796,0.000000,0.000000,11.510204,14.571429,4.959184,7.020408,0.510204,2.510204,1.914000,1.935000,1.976500,1.733500,-0.387500,1.583126,5.553120,3.477861,6.881561,4.010007,5.537488,2.297469,5.973420,2.916354,6.001831,2.60,3.20,2.62
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4949,0.447368,0.6,0.157895,0.4,1549.801937,1567.656994,1.261521,1.720533,5.380406,5.765467,11.482014,9.224256,1.591704,1.359998,0.704842,0.500421,0.023102,0.024295,14.022928,12.652535,4.589902,4.339203,2.099701,1.347368,1.876216,1.994746,1.815644,2.049951,0.188785,0.447368,0.6,0.157895,0.4,1567.656994,1549.801937,1.330970,1.232383,5.442022,4.320277,10.632374,11.395848,1.401492,1.318978,0.591241,0.501612,0.128647,0.097965,13.651956,10.228199,4.534917,3.846102,1.357584,1.231665,1.993966,1.884081,2.123252,1.754956,0.106011,1.592553,4.602562,1.562375,4.967551,1.928114,3.080280,1.689039,6.400569,1.677174,4.441914,2.75,3.60,2.62
4950,0.421053,0.4,0.289474,0.6,1520.411452,1546.461760,1.018660,1.607977,5.066188,4.833421,10.483965,11.992207,1.581523,1.106250,0.735976,0.447844,0.071242,0.079449,10.826491,11.602295,4.021974,3.862998,1.662156,1.751864,1.938889,1.939490,2.053532,1.915178,0.436705,0.421053,0.4,0.289474,0.6,1546.461760,1520.411452,1.108738,1.462203,5.611883,5.300684,11.293843,11.171714,1.349860,1.350959,0.681212,0.518444,0.072246,0.041085,12.692089,11.836398,4.556892,4.137001,1.861655,1.849007,2.010493,1.870704,1.848792,2.020223,0.281209,1.700947,2.651476,2.323532,1.081644,2.363116,2.989988,0.933416,1.644591,2.010164,2.191385,3.00,3.25,2.60
4951,0.394737,0.6,0.605263,0.8,1513.740462,1663.134224,1.263713,1.457554,5.663536,4.187155,10.328695,10.939110,1.239496,1.054580,0.586392,0.443609,0.005387,0.091234,12.452771,11.747707,4.003907,3.858941,1.621902,1.362317,1.867457,2.006749,2.024455,1.911703,0.379764,0.394737,0.6,0.605263,0.8,1663.134224,1513.740462,1.848987,0.860756,3.755354,5.849745,11.132337,9.514379,1.074810,1.771308,0.414233,0.893722,0.042799,0.054557,9.727137,16.066182,3.341093,5.717481,1.928424,1.130717,1.879666,1.984855,1.805033,2.053100,-0.926899,1.833687,10.622580,2.323532,12.338755,2.186764,12.897203,2.666904,11.734376,2.132119,11.813535,1.66,4.00,5.75
4952,0.289474,0.2,0.210526,0.4,1405.922861,1523.094877,1.137916,1.434621,5.975406,5.069325,10.171646,9.963712,2.186921,1.087311,0.628756,0.350691,0.030692,0.043723,16.577464,11.759445,7.149427,4.548139,1.314872,1.393248,1.884552,1.985978,1.756776,2.128261,0.502253,0.289474,0.2,0.210526,0.4,1523.094877,1405.922861,0.938616,1.525725,6.009157,4.388209,11.883636,9.503644,1.244818,0.939679,0.422908,0.382256,0.088614,0.012340,14.509454,10.273809,4.536976,3.510299,1.211391,1.578136,1.965764,1.906055,2.282184,1.675649,0.234690,0.466927,1.784327,3.405176,1.862832,0.540813,1.975141,1.066761,2.622455,1.423612,1.915461,4.33,3.40,2.00


In [55]:
# Assign the modelled odds to our predictions df
predictions_df = (production_df.loc[:, ['Date', 'HomeTeam', 'AwayTeam', 'B365H', 'B365D', 'B365A']]
                               .assign(homeModelledOdds=[i[2] for i in predicted_odds],
                                      drawModelledOdds=[i[1] for i in predicted_odds],
                                      awayModelledOdds=[i[0] for i in predicted_odds])
                               .rename(columns={
                                   'B365H': 'BetfairHomeOdds',
                                   'B365D': 'BetfairDrawOdds',
                                   'B365A': 'BetfairAwayOdds'}))

In [56]:
predictions_df

Unnamed: 0,Date,HomeTeam,AwayTeam,BetfairHomeOdds,BetfairDrawOdds,BetfairAwayOdds,homeModelledOdds,drawModelledOdds,awayModelledOdds
0,2018-09-01,Leicester,Liverpool,7.8,5.1,1.48,5.859833,5.371972,1.554738
1,2018-09-02,Brighton,Fulham,2.36,3.5,3.5,2.202812,3.838564,3.502373
2,2018-09-02,Burnley,Man United,6.6,3.9,1.7,5.306254,4.541187,1.691085
3,2018-09-02,Chelsea,Bournemouth,1.32,6.8,12.0,1.311959,6.090482,13.588764
4,2018-09-02,Crystal Palace,Southampton,2.04,3.55,4.5,2.201174,4.204776,3.248102
5,2018-09-02,Everton,Huddersfield,1.54,4.4,8.2,1.648607,3.787356,7.728509
6,2018-09-02,West Ham,Wolves,2.62,3.5,2.98,2.027439,4.070999,3.829555
7,2018-09-02,Man City,Newcastle,1.12,12.5,32.0,1.042232,29.423485,153.034722
8,2018-09-02,Cardiff,Arsenal,7.0,4.3,1.62,6.308346,4.951069,1.563714
9,2018-09-03,Watford,Tottenham,5.9,4.3,1.68,5.653902,4.340181,1.687121


Above are the predictions for this Gameweek's matches.