In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import dataframe_image as dfi

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import metrics

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import EarlyStopping

---

## NN Model for Predicting Home Score

In [3]:
df = pd.read_csv('./data/cleaned_results_and_rankings.csv')
df.head(2)

Unnamed: 0,date,home_team,away_team,home_score,away_score,neutral,home_rank,away_rank,h_goals_for_avg,h_goals_against_avg,a_goals_for_avg,a_goals_against_avg,datetime,match_type_FIFA_WCQ,match_type_Friendly,match_type_Qualifier,match_type_Tournament/Cup_Match
0,1993-01-01,Ghana,Mali,1,1,1,39,69,0.0,0.0,0.0,0.0,1993-01-01,0,1,0,0
1,1993-01-02,Gabon,Burkina Faso,1,1,0,55,97,0.0,0.0,0.0,0.0,1993-01-02,0,1,0,0


In [140]:
features = ['neutral', 'home_rank', 'away_rank', 'h_goals_for_avg', 'h_goals_against_avg', 'a_goals_for_avg', 'a_goals_against_avg', 'match_type_FIFA_WCQ', 'match_type_Friendly', 'match_type_Qualifier', 'match_type_Tournament/Cup_Match']
X = df[features]
y = df['home_score']

In [141]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [142]:
ss = StandardScaler()
ss.fit(X_train, y_train)
X_train_sc = ss.transform(X_train)
X_test_sc = ss.transform(X_test)

In [143]:
home_model = Sequential()

home_model.add(Dense(32, activation='relu', input_shape=(11,)))
home_model.add(Dense(32, activation='relu'))
home_model.add(Dense(32, activation='relu'))
home_model.add(Dense(1, activation=None))

home_model.compile(loss='mse', optimizer='adam')

In [144]:
home_model.fit(X_train_sc, y_train,
               batch_size=512,
               epochs=100,
               validation_data=(X_test_sc, y_test),
               verbose=0)

<keras.callbacks.History at 0x203a6544280>

In [145]:
home_score_preds = home_model.predict(X_test_sc).round().astype(int)
home_score_preds = [i for sublist in home_score_preds for i in sublist]
# home_score_preds

In [146]:
metrics.r2_score(y_test, home_score_preds)

0.18064372128244988

In [147]:
# on average, home score predictions are 1 goal off
metrics.mean_absolute_error(y_test, home_score_preds)

1.0

---

## NN Model for Predicting Away Score

In [148]:
features = ['neutral', 'home_rank', 'away_rank', 'h_goals_for_avg', 'h_goals_against_avg', 'a_goals_for_avg',
            'a_goals_against_avg', 'match_type_FIFA_WCQ', 'match_type_Friendly', 'match_type_Qualifier',
            'match_type_Tournament/Cup_Match']
X = df[features]
y = df['away_score']

In [149]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [150]:
ss = StandardScaler()
ss.fit(X_train, y_train)
X_train_sc = ss.transform(X_train)
X_test_sc = ss.transform(X_test)

In [151]:
away_model = Sequential()

away_model.add(Dense(32, activation='relu', input_shape=(11,)))
away_model.add(Dense(32, activation='relu'))
away_model.add(Dense(32, activation='relu'))
away_model.add(Dense(1, activation=None))

away_model.compile(loss='mse', optimizer='adam')

In [152]:
away_model.fit(X_train_sc, y_train,
               batch_size=512,
               epochs=100,
               validation_data=(X_test_sc, y_test),
               verbose=0)

<keras.callbacks.History at 0x203a7a65910>

In [153]:
away_score_preds = away_model.predict(X_test_sc).round().astype(int)
away_score_preds = [i for sublist in away_score_preds for i in sublist]
# away_score_preds

In [154]:
# 11% of predicted away scores can be explained by the model
metrics.r2_score(y_test, away_score_preds)

0.10618349305798258

In [155]:
# on average, away score predictions are less than one goal off
metrics.mean_absolute_error(y_test, away_score_preds)

0.8097535757366879

---

## Predicting Upcoming Fixtures

In [3]:
qualifying_fixtures = pd.read_csv('./data/cleaned_qualifying_fixtures.csv')
qualifying_fixtures.drop(columns=['home_score', 'away_score'], inplace=True)
qualifying_fixtures.head(2)

Unnamed: 0,date,home_team,away_team,neutral,home_rank,away_rank,h_goals_for_avg,h_goals_against_avg,a_goals_for_avg,a_goals_against_avg,datetime,match_type_FIFA_WCQ,match_type_Friendly,match_type_Qualifier,match_type_Tournament/Cup_Match
0,2021-09-02,Canada,Honduras,0,59,63,3.2,0.2,1.1,1.9,2021-09-02,1,0,0,0
1,2021-09-02,Panama,Costa Rica,0,74,44,2.4,1.1,0.9,1.3,2021-09-02,1,0,0,0


In [157]:
features = ['neutral', 'home_rank', 'away_rank', 'h_goals_for_avg', 'h_goals_against_avg', 'a_goals_for_avg',
            'a_goals_against_avg', 'match_type_FIFA_WCQ', 'match_type_Friendly', 'match_type_Qualifier',
            'match_type_Tournament/Cup_Match']
X = qualifying_fixtures[features]

In [158]:
X_sc = ss.fit_transform(X)

In [159]:
home_score_preds = home_model.predict(X_sc).round().astype(int)
home_score_preds = [i for sublist in home_score_preds for i in sublist]
# home_score_preds

In [160]:
qualifying_fixtures['home_score'] = home_score_preds

In [161]:
away_score_preds = away_model.predict(X_sc).round().astype(int)
away_score_preds = [i for sublist in away_score_preds for i in sublist]
# away_score_preds

In [162]:
qualifying_fixtures['away_score'] = away_score_preds

In [166]:
CONCACAF_fixture_predictions = qualifying_fixtures[['date', 'home_rank', 'home_team', 'home_score', 'away_score', 'away_team', 'away_rank']]
CONCACAF_fixture_predictions

Unnamed: 0,date,home_rank,home_team,home_score,away_score,away_team,away_rank
0,2021-09-02,59,Canada,2,0,Honduras,63
1,2021-09-02,74,Panama,1,1,Costa Rica,44
2,2021-09-02,9,Mexico,3,0,Jamaica,50
3,2021-09-02,64,El Salvador,0,2,United States,10
4,2021-09-05,50,Jamaica,2,0,Panama,74
5,2021-09-05,64,El Salvador,2,0,Honduras,63
6,2021-09-05,44,Costa Rica,1,2,Mexico,9
7,2021-09-05,10,United States,2,0,Canada,59
8,2021-09-08,59,Canada,2,0,El Salvador,64
9,2021-09-08,74,Panama,1,3,Mexico,9


In [167]:
dfi.export(CONCACAF_fixture_predictions, './images/CONCACAF_fixture_predictions.png')

In [164]:
qualifying_fixtures.to_csv('./data/qualifier_predictions.csv', index=False)