Model for predicting outcome of Premier Leauge match given teams and odds 
- made by Eugen Sedlar 

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import pandas as pd
import tensorflow as tf

Load data

In [2]:
data = pd.read_csv("filtered_data.csv")

Convert columns into numerical with LabelEncoder

In [3]:
label_enc = LabelEncoder()
data['HomeTeam'] = label_enc.fit_transform(data['HomeTeam'])
data['AwayTeam'] = label_enc.fit_transform(data['AwayTeam'])
result_label_enc = LabelEncoder()
data['Result'] = result_label_enc.fit_transform(data['Result'])

In [4]:
X = data[['HomeTeam', 'AwayTeam', '1', 'X', '2']]
y = data['Result']

Normalize odds (not necessary since odds are already normalized)

In [None]:
scaler = StandardScaler()
X[['1', 'X', '2']] = scaler.fit_transform(X[['1', 'X', '2']])

Split data into training and testing data

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=42)

Create and train model

In [7]:
model = Sequential([
    tf.keras.Input(shape=(X_train.shape[1],)),
    Dense(units=32, activation='relu'),
    Dropout(0.3),
    Dense(units=16, activation='relu'),
    Dropout(0.3),
    Dense(units=3, activation='linear')
])

In [8]:
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=10000,
    decay_rate=0.9
)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=["accuracy"])

In [9]:
model.fit(X_train, y_train, epochs=50)

Epoch 1/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 917us/step - accuracy: 0.4700 - loss: 1.4325
Epoch 2/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 583us/step - accuracy: 0.3884 - loss: 1.1335
Epoch 3/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 500us/step - accuracy: 0.4349 - loss: 1.0741
Epoch 4/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 584us/step - accuracy: 0.4684 - loss: 1.0343
Epoch 5/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 476us/step - accuracy: 0.4331 - loss: 1.0184
Epoch 6/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 501us/step - accuracy: 0.4723 - loss: 1.0179
Epoch 7/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 500us/step - accuracy: 0.4809 - loss: 1.0117
Epoch 8/50
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 500us/step - accuracy: 0.4828 - loss: 1.0312
Epoch 9/50
[1m13/13[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x28197551a00>

Test model

In [10]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 801us/step - accuracy: 0.5921 - loss: 0.9051
Test Accuracy: 58.96%
