<a href="https://colab.research.google.com/github/Speedbird45Bravo/Keras_projects_21/blob/main/Keras_SPI_4121.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [87]:
from keras.layers import Dense
from keras.models import Sequential
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

# This is a swing at a custom neural network predicting soccer game results based on scores.
# I've been in Keras for a few months, but am just starting to understand my way around.
df = pd.read_csv("https://projects.fivethirtyeight.com/soccer-api/club/spi_matches.csv").dropna()

# We need to calculate one team's aggregate margin in order to determine the result.
# We've chosen home for that calculation, but may as well get away as well for good measure.
# Then, the absolute margin is the number of goals between the teams.
df['h_marg'] = df['score1'] - df['score2']
df['a_marg'] = df['score2'] - df['score1']
df['margin'] = np.abs(df['score1'] - df['score2'])

results = []

for i in df['h_marg']:
  if i > 0: # If the home team's margin is greater than 0, it's a home win.
    results.append("HOME WIN")
  elif i < 0: # If the home team's margin is less than 0, it's an away win.
    results.append("AWAY WIN")
  else: # Otherwise, it's a draw.
    results.append("DRAW")

df['result'] = results

In [88]:
# The model gets a little confused the more numerical rows are fed into it, so we're keeping it simple.
X = df[['score1', 'score2', 'margin']].reset_index(drop=True)
y = df[['result']].copy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=6)

In [90]:
# Make some random test data (separate from the existing test dfs). First, we need to generate a function to turn the list into a dataframe.

def to_df(l1):
  df = pd.DataFrame(l1).reset_index(drop=True)
  return df

def to_cat(cats):
  cat = to_categorical(pd.factorize(cats)[0])
  return cat

t1 = to_df([3,0,2,4,2,1,3,2,4,1,5,1,3,4,3,2,1,1,0,5])
t2 = to_df([2,1,4,2,4,2,3,1,2,2,0,0,3,2,1,2,3,2,1,0])
t3 = np.abs(t1) - np.abs(t2)
t4 = t2 - t1
t5 = t1 - t2

t_res = []

cols = ['score1', 'score2', 'margin', 'h_margin', 'a_margin']

rand_X_test = pd.concat([t1, t2, t3, t4, t5], axis=1)
rand_X_test.columns = cols

for i in rand_X_test['h_margin']:
  if i > 0: # If the home team's margin is greater than 0, it's a home win.
    t_res.append("HOME WIN")
  elif i < 0: # If the home team's margin is less than 0, it's an away win.
    t_res.append("AWAY WIN")
  else: # Otherwise, it's a draw.
    t_res.append("DRAW")

rand_X_test['result'] = t_res
rand_y_test = to_cat(rand_X_test['result'])
rand_X_test = rand_X_test[['score1', 'score2', 'margin']]
rand_X_test = np.asarray(rand_X_test)
rand_y_test = np.asarray(rand_y_test)

In [91]:
# Have to factorize the labels before converting them to categorical (e.g. 1.,0.,0.).
y_train = to_cat(y_train['result'])
y_test = to_cat(y_test['result'])

# The train and test data will be easier to handle in array form.
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

In [92]:
# Three layers with a 3 unit output layer given the number of outcomes (HOME WIN, AWAY WIN, DRAW).
model = Sequential()
model.add(Dense(16, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [93]:
# Definitely excessive on the epochs. ¯\_(ツ)_/¯
model.fit(X_train, y_train, epochs=40, batch_size=128, validation_split=0.2, verbose=1)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<tensorflow.python.keras.callbacks.History at 0x7f9f9ab67810>

In [98]:
# Now we will predict results based on test data.
predictions = model.predict(rand_X_test).round(2)
predictions = np.asarray(predictions)

In [99]:
# Reshaping y_test and predictions to be directly compared 1v1 iteratively.
predictions = predictions.reshape(60,)
predictions = pd.DataFrame(predictions).reset_index(drop=True)
rand_y_test = rand_y_test.reshape(60,)
rand_y_test = pd.DataFrame(rand_y_test).reset_index(drop=True)
PvA = pd.concat([rand_y_test, predictions], axis=1)
PvA.columns = ["Predicted", "Actual"]

In [100]:
# If the margin is 0, there is no error. If the margin is 1, there was an error.
PvA['Margin'] = np.abs(PvA['Predicted'] - PvA['Actual'])

In [101]:
# Sum of all of the rows with errors.
PvA_err = np.sum(PvA['Margin'])

In [102]:
# Length of the prediction set.
PvA_len = len(PvA)

In [103]:
# Accuracy metric.
PvA_acc = 1-(PvA_err/PvA_len)

In [104]:
# AORTD = Accuracy on Random Test Data
print("AORTD: %.2f" % (PvA_acc * 100) + "%")

AORTD: 73.33%
