<a href="https://colab.research.google.com/github/Speedbird45Bravo/Keras_projects_21/blob/main/Keras_SPI_4621_sandbox_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1624]:
from keras.layers import Dense
from keras.models import Sequential
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

# This is a swing at a custom neural network predicting soccer game results based on scores.
# I've been in Keras for a few months, but am just starting to understand my way around.
df = pd.read_csv("https://projects.fivethirtyeight.com/soccer-api/club/spi_matches.csv").dropna()

# We need to calculate one team's aggregate margin in order to determine the result.
# We've chosen home for that calculation, but may as well get away as well for good measure.
# Then, the absolute margin is the number of goals between the teams.
df['h_marg'] = df['score1'] - df['score2']
df['a_marg'] = df['score2'] - df['score1']
df['margin'] = np.abs(df['score1'] - df['score2'])

results = []

for i in df['h_marg']:
  if i > 0: # If the home team's margin is greater than 0, it's a home win.
    results.append("HOME WIN")
  elif i < 0: # If the home team's margin is less than 0, it's an away win.
    results.append("AWAY WIN")
  else: # Otherwise, it's a draw.
    results.append("DRAW")

df['result'] = results

In [1625]:
# The model gets a little confused the more numerical rows are fed into it, so we're keeping it simple.
X = df[['score1', 'score2', 'h_marg']].reset_index(drop=True)
y = df[['result']].copy()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.34)

In [1626]:
def to_cat(cats):
  cat = to_categorical(pd.factorize(cats)[0])
  return cat

In [1627]:
# Have to factorize the labels before converting them to categorical (e.g. 1.,0.,0.).
y_train = to_cat(y_train['result'])
y_test = to_cat(y_test['result'])

# The train and test data will be easier to handle in array form.
X_train = np.asarray(X_train)
X_test = np.asarray(X_test)

In [1628]:
# Two layers with a 3 unit output layer given the number of outcomes (HOME WIN, AWAY WIN, DRAW).
model = Sequential()
model.add(Dense(100, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [1629]:
# Definitely excessive on the epochs. ¯\_(ツ)_/¯
model.fit(X_train, y_train, epochs=20, batch_size=100, verbose=0, validation_split=0.2)

<tensorflow.python.keras.callbacks.History at 0x7fd42846d710>

In [1630]:
# Now we will predict results based on the random data we generated.
predictions = model.predict(X_test).round(2)

In [1631]:
loss, accuracy = model.evaluate(y_test, predictions)
print("Test Accuracy: %.2f" % (accuracy * 100) + "%")

Test Accuracy: 71.04%
