In [4]:
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

In [5]:
windowSize = 5
outputs = pd.read_csv("outputs-{}-window".format(windowSize))
inputs = pd.read_csv("inputs-{}-window".format(windowSize))

b = outputs["Goal Difference"].values.flatten()
results = outputs["Result"].values.flatten()

b3 = np.zeros(shape = (len(results), 3), dtype=np.int32)
for index, result in enumerate(results):
    if result == "Home Win":
        b3[index][0] = 1
    elif result == "Draw":
        b3[index][1] = 1
    elif result == "Home Loss":
        b3[index][2] = 1

A = inputs.loc[:, ~inputs.columns.isin(["Date", "Home", "Away"])].values
assert len(A) == len(b3)
numSamples, numFeatures = A.shape
print(b3.shape, b3[0])

(1301, 3) [0 0 1]


In [9]:
# Neural Network approach
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# make custom Dataset for use in training loop
class MatchDataset(Dataset):
    def __init__(self, matches, labels):
        self.matches = matches
        # normalize data
        matches /= np.max(np.abs(matches),axis=0)
        self.labels = labels

    def __len__(self):
        return len(self.matches)

    def __getitem__(self, index):
        match = self.matches[index,:]
        label = self.labels[index]
        return (torch.FloatTensor(match), torch.LongTensor(label))

# make training and test datasets
X_train, X_test, y_train, y_test = train_test_split(A, b3, test_size=0.2, random_state=10)
training_dataset = MatchDataset(X_train, y_train)
testing_dataset = MatchDataset(X_test, y_test)
training_dataloader = DataLoader(training_dataset, batch_size=32, shuffle=True)
testing_dataloader = DataLoader(testing_dataset, batch_size=32, shuffle=True)

# define model
class MatchModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.first_linear = nn.Linear(41, 100)
        self.first_dropout = nn.Dropout(p=0.5)
        self.second_linear = nn.Linear(100, 50)
        self.second_dropout = nn.Dropout(p=0.3)
        self.third_linear = nn.Linear(50, 3)
        self.relu = nn.functional.relu
        self.softmax = nn.functional.softmax

    def forward(self, x):
        x = self.first_linear(x)
        x = self.first_dropout(x)
        x = self.relu(x)
        x = self.second_linear(x)
        x = self.second_dropout(x)
        x = self.relu(x)
        x = self.third_linear(x)
        x = self.softmax(x, dim=1)
        return x

# initialize model, loss func, optimizer
model = MatchModel()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

# training on a CPU (yuck!)
def train_model(model, criterion, optim, iterator, dubbo=False):
    model.train()
    total_loss = 0
    for x, y in iterator:
        optim.zero_grad()
        y_hat = model(x)
        loss = criterion(y_hat, torch.max(y, 1)[1])
        total_loss += loss
        loss.backward()
        optim.step()
    return total_loss

def evaluate(model, iterator):
    pred, real = [], []
    model.eval()
    with torch.no_grad():
      for x, y in iterator:
        y_hat = model(x)
        for ground_truth, prediction in zip(y, y_hat):
            pred.append(prediction.argmax())
            real.append(ground_truth.argmax())

    return sum([1 if y == y_hat else 0 for y, y_hat in zip(pred, real)]) / len(pred)

for epoch in range(15):
    loss = train_model(model, criterion, optimizer, training_dataloader)
    accuracy = evaluate(model, training_dataloader)
    print(f'Training epoch {epoch}, Goal MSE: {accuracy}')

print(f'Testing Dataset, Goal MSE: {evaluate(model, testing_dataloader)}')

Training epoch 0, Goal MSE: 0.49038461538461536
Training epoch 1, Goal MSE: 0.4951923076923077
Training epoch 2, Goal MSE: 0.49615384615384617
Training epoch 3, Goal MSE: 0.5038461538461538
Training epoch 4, Goal MSE: 0.5048076923076923
Training epoch 5, Goal MSE: 0.5115384615384615
Training epoch 6, Goal MSE: 0.5173076923076924
Training epoch 7, Goal MSE: 0.5163461538461539
Training epoch 8, Goal MSE: 0.5182692307692308
Training epoch 9, Goal MSE: 0.5182692307692308
Training epoch 10, Goal MSE: 0.5259615384615385
Training epoch 11, Goal MSE: 0.5355769230769231
Training epoch 12, Goal MSE: 0.5278846153846154
Training epoch 13, Goal MSE: 0.5355769230769231
Training epoch 14, Goal MSE: 0.5326923076923077
Testing Dataset, Goal MSE: 0.5670498084291188


In [8]:
#Method of least squares
At = np.transpose(A)
x = np.dot(np.matmul(np.linalg.inv(np.matmul(At, A)), At), b)
prediction = np.dot(A, x)
MSE = ((prediction - b)**2).sum()/len(prediction)
MSE

2.800044085959935

In [12]:
At = np.transpose(A)
x = np.dot(np.matmul(np.linalg.inv(np.matmul(At, A)), At), b3)
prediction = np.dot(A, x)

correct = 0
total = len(outputs)

for pred, act in zip(prediction, b3):
    pred -= pred.min()
    assert sum(pred) != 0
    pred /= sum(pred)
    if act[pred.argmax()] > 0:
        correct += 1

print(correct/total)

0.528055342044581
