In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import metrics

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import EarlyStopping

---

## NN Model for Predicting Home Score

In [2]:
df = pd.read_csv('./data/cleaned_results_and_rankings.csv')
df.head()

Unnamed: 0,date,home_team,away_team,home_score,away_score,neutral,home_rank,away_rank,datetime,match_type_FIFA_WCQ,match_type_Friendly,match_type_Qualifier,match_type_Tournament/Cup_Match
0,1993-01-01,Ghana,Mali,1,1,1,39,69,1993-01-01,0,1,0,0
1,1993-01-02,Gabon,Burkina Faso,1,1,0,55,97,1993-01-02,0,1,0,0
2,1993-01-02,Kuwait,Lebanon,2,0,0,71,161,1993-01-02,0,1,0,0
3,1993-01-03,Burkina Faso,Mali,1,0,1,97,69,1993-01-03,0,1,0,0
4,1993-01-03,Gabon,Ghana,2,3,0,55,39,1993-01-03,0,1,0,0


In [3]:
features = ['neutral', 'home_rank', 'away_rank', 'match_type_FIFA_WCQ', 'match_type_Friendly', 'match_type_Qualifier', 'match_type_Tournament/Cup_Match']
X = df[features]
y = df['home_score']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [5]:
ss = StandardScaler()
ss.fit(X_train, y_train)
X_train_sc = ss.transform(X_train)
X_test_sc = ss.transform(X_test)

In [6]:
home_model = Sequential()

home_model.add(Dense(32, activation='relu', input_shape=(7,)))
home_model.add(Dense(32, activation='relu'))
home_model.add(Dense(32, activation='relu'))
home_model.add(Dense(1, activation=None))

home_model.compile(loss='mse', optimizer='adam')

In [7]:
home_model.fit(X_train_sc, y_train,
               batch_size=512,
               epochs=100,
               validation_data=(X_test_sc, y_test),)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x17bfd517940>

In [8]:
home_score_preds = home_model.predict(X_test_sc).round().astype(int)
home_score_preds = [i for sublist in home_score_preds for i in sublist]
home_score_preds

[1,
 1,
 2,
 2,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 1,
 2,
 2,
 1,
 2,
 2,
 1,
 1,
 2,
 2,
 1,
 3,
 2,
 2,
 1,
 2,
 1,
 1,
 2,
 2,
 1,
 2,
 1,
 1,
 1,
 2,
 2,
 3,
 1,
 1,
 2,
 2,
 1,
 2,
 2,
 1,
 1,
 1,
 1,
 4,
 2,
 1,
 1,
 2,
 2,
 3,
 3,
 2,
 2,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 1,
 1,
 3,
 1,
 3,
 1,
 3,
 1,
 1,
 0,
 2,
 1,
 3,
 1,
 2,
 3,
 3,
 1,
 2,
 2,
 1,
 2,
 2,
 0,
 2,
 1,
 2,
 2,
 1,
 1,
 2,
 1,
 1,
 1,
 2,
 1,
 2,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 2,
 2,
 1,
 1,
 2,
 2,
 2,
 2,
 1,
 2,
 1,
 1,
 1,
 2,
 1,
 1,
 2,
 3,
 2,
 1,
 2,
 2,
 2,
 1,
 1,
 1,
 1,
 0,
 1,
 2,
 2,
 2,
 1,
 2,
 1,
 1,
 2,
 2,
 2,
 1,
 1,
 2,
 2,
 2,
 3,
 1,
 1,
 2,
 3,
 2,
 1,
 2,
 1,
 2,
 1,
 2,
 1,
 3,
 1,
 1,
 1,
 2,
 1,
 2,
 1,
 2,
 2,
 2,
 1,
 2,
 3,
 2,
 2,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 1,
 3,
 2,
 3,
 1,
 2,
 2,
 1,
 2,
 1,
 4,
 1,
 2,
 2,
 2,
 2,
 2,
 1,
 2,
 1,
 2,
 3,
 1,
 1,
 1,
 4,
 1,
 4,
 2,
 1,
 2,
 2,
 2,
 2,
 1,
 1,
 2,
 0,
 1,
 2,
 2,
 4,
 2,
 1,
 1,
 2,
 2,
 2,
 1,
 3,
 1,


In [9]:
metrics.r2_score(y_test, home_score_preds)

0.17250840842670623

---

## NN Model for Predicting Away Score

In [10]:
features = ['neutral', 'home_rank', 'away_rank', 'match_type_FIFA_WCQ', 'match_type_Friendly', 'match_type_Qualifier', 'match_type_Tournament/Cup_Match']
X = df[features]
y = df['away_score']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [12]:
ss = StandardScaler()
ss.fit(X_train, y_train)
X_train_sc = ss.transform(X_train)
X_test_sc = ss.transform(X_test)

In [13]:
away_model = Sequential()

away_model.add(Dense(32, activation='relu', input_shape=(7,)))
away_model.add(Dense(32, activation='relu'))
away_model.add(Dense(32, activation='relu'))
away_model.add(Dense(1, activation=None))

away_model.compile(loss='mse', optimizer='adam')

In [14]:
away_model.fit(X_train_sc, y_train,
               batch_size=512,
               epochs=100,
               validation_data=(X_test_sc, y_test),)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x17bfc28a430>

In [15]:
away_score_preds = away_model.predict(X_test_sc).round().astype(int)
away_score_preds = [i for sublist in away_score_preds for i in sublist]
away_score_preds

[1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 2,
 1,
 2,
 2,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 3,
 0,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 2,
 2,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 5,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 3,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 0,
 1,
 1,
 2,
 1,
 2,
 2,
 1,
 0,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 3,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 2,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 2,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 0,
 2,
 1,
 1,
 0,
 2,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 0,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,


In [16]:
metrics.r2_score(y_test, away_score_preds)

0.14550639454162306

---