In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import metrics

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dropout
from tensorflow.keras.callbacks import EarlyStopping

---

## NN Model for Predicting Home Score

In [2]:
df = pd.read_csv('./data/cleaned_results_and_rankings.csv')
df.head()

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,home_rank,away_rank,datetime
0,1993-01-01,Ghana,Mali,1,1,Friendly,Libreville,Gabon,1,39.0,69.0,1993-01-01
1,1993-01-02,Gabon,Burkina Faso,1,1,Friendly,Libreville,Gabon,0,55.0,97.0,1993-01-02
2,1993-01-02,Kuwait,Lebanon,2,0,Friendly,Kuwait City,Kuwait,0,71.0,161.0,1993-01-02
3,1993-01-03,Burkina Faso,Mali,1,0,Friendly,Libreville,Gabon,1,97.0,69.0,1993-01-03
4,1993-01-03,Gabon,Ghana,2,3,Friendly,Libreville,Gabon,0,55.0,39.0,1993-01-03


In [3]:
features = ['neutral', 'home_rank', 'away_rank']
X = df[features]
y = to_categorical(df['home_score'])

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [5]:
ss = StandardScaler()
ss.fit(X_train, y_train)
X_train_sc = ss.transform(X_train)
X_test_sc = ss.transform(X_test)

In [6]:
home_model = Sequential()

home_model.add(Dense(32, activation='relu', input_shape=(3,)))
home_model.add(Dropout(.6))
home_model.add(Dense(32, activation='relu'))
home_model.add(Dropout(.4))
home_model.add(Dense(32, activation='relu'))
home_model.add(Dropout(.2))
home_model.add(Dense(11, activation='softmax'))

early_stop = EarlyStopping(patience=5)

home_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

In [7]:
home_model.fit(X_train_sc, y_train,
               batch_size=512,
               epochs=100,
               validation_data=(X_test_sc, y_test),
               callbacks=[early_stop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100


<keras.callbacks.History at 0x251038f6310>

---

## NN Model for Predicting Away Score

In [8]:
features = ['neutral', 'home_rank', 'away_rank']
X = df[features]
y = to_categorical(df['away_score'])

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [10]:
ss = StandardScaler()
ss.fit(X_train, y_train)
X_train_sc = ss.transform(X_train)
X_test_sc = ss.transform(X_test)

In [11]:
away_model = Sequential()

away_model.add(Dense(32, activation='relu', input_shape=(3,)))
away_model.add(Dropout(.6))
away_model.add(Dense(32, activation='relu'))
away_model.add(Dropout(.4))
away_model.add(Dense(32, activation='relu'))
away_model.add(Dropout(.2))
away_model.add(Dense(11, activation='softmax'))

early_stop = EarlyStopping(patience=5)

away_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

In [12]:
away_model.fit(X_train_sc, y_train,
               batch_size=512,
               epochs=100,
               validation_data=(X_test_sc, y_test),
               callbacks=[early_stop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100


<keras.callbacks.History at 0x25102314940>

In [13]:
# go back to linear regression and use r2 metric
# check performance using floor/round methods