In [21]:
import joblib
import pandas as pd
import datetime
import numpy as np

prediction_labels = ['','no-score draw','score draw','home win','away win']

In [61]:
# load the pre-trained models

football_classifier_mlp = joblib.load('football_classifier_mlp.pkl')
football_classifier_knn = joblib.load('football_classifier_knn.pkl')
football_classifier_ada = joblib.load('football_classifier_ada.pkl')
classifiers = {
    "mlp": football_classifier_mlp,
    "knn": football_classifier_knn,
    "ada": football_classifier_ada
}

In [62]:
# load the cleaned data which contains the features for predicting the next round of matches
cleaned_results = pd.read_csv('cleaned_results.csv')
cleaned_results

Unnamed: 0,league,home_team,away_team,home_elo,away_elo,home_goals_f,home_goals_a,away_goals_a,away_goals_f,home_streak,away_streak
0,premier_league,Tottenham,Brighton,89,60,31,17,10,20,4,0
1,premier_league,Man United,Norwich,92,63,25,19,12,31,0,0
2,premier_league,Southampton,Arsenal,81,92,20,20,25,14,0,-2
3,premier_league,Watford,Brentford,71,54,14,37,18,19,-2,2
4,premier_league,Newcastle,Leicester,69,79,21,25,26,20,0,0
...,...,...,...,...,...,...,...,...,...,...,...
114,eredivisie,AZ Alkmaar,Feyenoord,80,83,31,14,34,12,0,2
115,eredivisie,Vitesse,Utrecht,76,69,17,24,29,18,0,0
116,eredivisie,Sparta Rotterdam,PSV Eindhoven,66,88,13,20,42,18,0,0
117,eredivisie,Go Ahead Eagles,Ajax,58,88,24,21,54,8,3,5


In [63]:
# use the cleaned data to predict the next round of matches
features = cleaned_results.to_numpy()[:,3:].astype(float)
teams = cleaned_results.to_numpy()[:, :3]
predictions = {}
for c in classifiers:
    predictions[c] = classifiers[c].predict(features)

In [66]:
outputs = {}
for c in classifiers:
    print('Classifier:', c)
    print('************************')
    league_name = ''
    prediction_list = []
    for result, home_away in zip(predictions[c], teams):
        if league_name != home_away[0]:
            league_name = home_away[0]
            print(f'\n{league_name}')
            print('-----------------')
        print('{} vs {}: {}'.format(home_away[1], home_away[2], prediction_labels[result]))
        prediction_list.append([home_away[0], home_away[1], home_away[2], prediction_labels[result]])

    outputs[c] = pd.DataFrame(prediction_list, columns=['league', 'home', 'away', 'prediction'])
    outputs[c].to_csv(f'predictions_{c}_{datetime.date.today().strftime("%d-%m-%Y")}.csv', index=False)

Classifier: mlp
************************

premier_league
-----------------
Tottenham vs Brighton: home win
Man United vs Norwich: home win
Southampton vs Arsenal: home win
Watford vs Brentford: away win
Newcastle vs Leicester: home win
West Ham vs Burnley: home win
Wolves vs Man City: home win
Aston Villa vs Liverpool: home win
Leeds vs Chelsea: home win
Everton vs Crystal Palace: home win

championship
-----------------
Middlesbrough vs Huddersfield: away win
Barnsley vs Peterboro: home win
Blackburn vs Stoke: home win
Blackpool vs Birmingham: home win
Cardiff vs Luton: home win
Coventry vs Bournemouth: home win
Millwall vs Hull: home win
QPR vs Derby: away win
Reading vs Swansea: home win
Bristol City vs Sheffield United: home win
Nott'm Forest vs West Brom: home win
Fulham vs Preston: away win

bundesliga
-----------------
Wolfsburg vs Mainz: home win
Greuther Furth vs Leverkusen: home win
RB Leipzig vs Union Berlin: home win
Freiburg vs Bochum: home win
Ein Frankfurt vs Hoffenheim:

In [67]:
for c in classifiers:
    results = outputs[c][['league','prediction']]
    results = results.groupby('league')['prediction'].value_counts().unstack().fillna(0).astype(int)
    results.reindex()
    print('Classifier: ', c)
    print(results.pivot_table(index='league', margins=True, margins_name='total', aggfunc=sum))

Classifier:  mlp
prediction        away win  home win  no-score draw  score draw
league                                                         
2_liga                   1         8              0           0
bundesliga               0         9              0           0
championship             3         9              0           0
eredivisie               1         8              0           0
ligue_1                  1         9              0           0
ligue_2                  1         9              0           0
premier_league           1         9              0           0
primeira_liga            1         8              0           0
primera_division         2         8              0           0
segunda_division         1        10              0           0
serie_a                  1         8              1           0
serie_b                  1         8              0           1
total                   14       103              1           1
Classifier:  knn
predic