## Apply the model

- Load the model(s) which have been created and trained
- Load data pertaining to the upcoming round of matches
- Predict the game results

In [16]:
import joblib
import pandas as pd
import datetime
import numpy as np

prediction_labels = ['','no-score draw','score draw','home win','away win']

In [17]:
# load the pre-trained models

football_classifier_mlp = joblib.load('football_classifier_mlp.pkl')
football_classifier_knn = joblib.load('football_classifier_knn.pkl')
football_classifier_ada = joblib.load('football_classifier_ada.pkl')
classifiers = {
    "mlp": football_classifier_mlp,
    "knn": football_classifier_knn,
    "ada": football_classifier_ada
}

In [18]:
# load the cleaned data which contains the features for predicting the next round of matches
cleaned_results = pd.read_csv('cleaned_results.csv')
cleaned_results

Unnamed: 0,league,home_team,away_team,home_elo,away_elo,home_goals_f,home_goals_a,away_goals_f,away_goals_a,home_streak,away_streak
0,premier_league,Newcastle,Liverpool,69,89,23,26,40,15,2,0
1,premier_league,Aston Villa,Norwich,63,62,24,25,10,35,-4,0
2,premier_league,Southampton,Crystal Palace,81,74,21,20,18,23,0,0
3,premier_league,Tottenham,Leicester,89,80,31,18,20,30,0,0
4,premier_league,Watford,Burnley,71,69,15,39,13,25,-3,0
...,...,...,...,...,...,...,...,...,...,...,...
114,eredivisie,For Sittard,Willem II,45,61,17,26,11,32,0,-2
115,eredivisie,Utrecht,PSV Eindhoven,77,88,29,18,31,16,0,0
116,eredivisie,Heracles,Vitesse,72,76,20,17,18,19,0,0
117,eredivisie,Zwolle,Feyenoord,71,84,11,17,33,16,0,2


In [19]:
# use the cleaned data to predict the next round of matches
features = cleaned_results.to_numpy()[:,3:].astype(float)
teams = cleaned_results.to_numpy()[:, :3]
predictions = {}
for c in classifiers:
    predictions[c] = classifiers[c].predict(features)

In [20]:
outputs = {}
for c in classifiers:
    print('Classifier:', c)
    print('************************')
    league_name = ''
    prediction_list = []
    for result, home_away in zip(predictions[c], teams):
        if league_name != home_away[0]:
            league_name = home_away[0]
            print(f'\n{league_name}')
            print('-----------------')
        print('{} vs {}: {}'.format(home_away[1], home_away[2], prediction_labels[result]))
        prediction_list.append([home_away[0], home_away[1], home_away[2], prediction_labels[result]])

    outputs[c] = pd.DataFrame(prediction_list, columns=['league', 'home', 'away', 'prediction'])
    outputs[c].to_csv(f'predictions_{c}_{datetime.date.today().strftime("%d-%m-%Y")}.csv', index=False)

Classifier: mlp
************************

premier_league
-----------------
Newcastle vs Liverpool: home win
Aston Villa vs Norwich: away win
Southampton vs Crystal Palace: home win
Tottenham vs Leicester: home win
Watford vs Burnley: home win
Wolves vs Brighton: away win
Leeds vs Man City: home win
Everton vs Chelsea: home win
West Ham vs Arsenal: home win
Man United vs Brentford: away win

championship
-----------------
Huddersfield vs Barnsley: home win
Luton vs Blackpool: home win
Bournemouth vs Fulham: home win
Birmingham vs Millwall: home win
Derby vs Bristol City: home win
Hull vs Reading: home win
Peterboro vs Nott'm Forest: home win
Sheffield United vs Cardiff: home win
Stoke vs QPR: away win
Swansea vs Middlesbrough: home win
West Brom vs Coventry: away win
Preston vs Blackburn: home win

bundesliga
-----------------
Wolfsburg vs Mainz: home win
Greuther Furth vs Leverkusen: home win
RB Leipzig vs Union Berlin: away win
Freiburg vs Bochum: home win
Ein Frankfurt vs Hoffenheim:

In [21]:
for c in classifiers:
    results = outputs[c][['league','prediction']]
    results = results.groupby('league')['prediction'].value_counts().unstack().fillna(0).astype(int)
    results.reindex()
    print('Classifier: ', c)
    print(results.pivot_table(index='league', margins=True, margins_name='total', aggfunc=sum))

Classifier:  mlp
prediction        away win  home win  score draw
league                                          
2_liga                   0         9           0
bundesliga               1         8           0
championship             2        10           0
eredivisie               2         7           0
ligue_1                  0         9           1
ligue_2                  6         4           0
premier_league           3         7           0
primeira_liga            1         7           1
primera_division         3         7           0
segunda_division         3         8           0
serie_a                  1         9           0
serie_b                  1         7           2
total                   23        92           4
Classifier:  knn
prediction        away win  home win
league                              
2_liga                   4         5
bundesliga               4         5
championship             4         8
eredivisie               5         4
ligue_1  