In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization, ReLU, Concatenate
from tensorflow.keras.utils import to_categorical
import tensorflow as tf

In [3]:
# Load and preprocess the data
data = pd.read_csv('../final_combined_with_team_records.csv')
data = data.fillna(0)

def get_result(row):
    if row['home_team_goal'] > row['away_team_goal']:
        return 0
    elif row['home_team_goal'] == row['away_team_goal']:
        return 1
    else:
        return 2

data['result'] = data.apply(get_result, axis=1)
labels = to_categorical(data['result'], num_classes=3)

feature_columns = ['avg_home_prob', 'avg_draw_prob', 'avg_away_prob']

for i in range(1, 12):
    feature_columns += [f'home_player_{i}_rating', f'home_player_{i}_potential']
    feature_columns += [f'away_player_{i}_rating', f'away_player_{i}_potential']

record_columns = ['home_team_wins', 'home_team_draws', 'home_team_losses',
                  'away_team_wins', 'away_team_draws', 'away_team_losses']
for col in record_columns:
    if col in data.columns:
        feature_columns.append(col)

features = data[feature_columns].copy()

In [4]:
# Split into model input parts
home_features = features[[f'home_player_{i}_rating' for i in range(1, 12)] + [f'home_player_{i}_potential' for i in range(1, 12)]]
away_features = features[[f'away_player_{i}_rating' for i in range(1, 12)] + [f'away_player_{i}_potential' for i in range(1, 12)]]
context_features = features.drop(columns=home_features.columns.tolist() + away_features.columns.tolist())

# Train-test split
X_home_train, X_home_test, X_away_train, X_away_test, X_ctx_train, X_ctx_test, y_train, y_test = train_test_split(
    home_features, away_features, context_features, labels, test_size=0.2, random_state=42
)

In [5]:
# Normalize features
scaler_home = StandardScaler().fit(X_home_train)
scaler_away = StandardScaler().fit(X_away_train)
scaler_ctx = StandardScaler().fit(X_ctx_train)

X_home_train = scaler_home.transform(X_home_train)
X_away_train = scaler_away.transform(X_away_train)
X_ctx_train = scaler_ctx.transform(X_ctx_train)
X_home_test = scaler_home.transform(X_home_test)
X_away_test = scaler_away.transform(X_away_test)
X_ctx_test = scaler_ctx.transform(X_ctx_test)

In [6]:
# Define model
input_home = Input(shape=(X_home_train.shape[1],))
home_encoded = Dense(64)(input_home)
home_encoded = BatchNormalization()(home_encoded)
home_encoded = ReLU()(home_encoded)
home_encoded = Dropout(0.1)(home_encoded)

input_away = Input(shape=(X_away_train.shape[1],))
away_encoded = Dense(64)(input_away)
away_encoded = BatchNormalization()(away_encoded)
away_encoded = ReLU()(away_encoded)
away_encoded = Dropout(0.1)(away_encoded)

input_ctx = Input(shape=(X_ctx_train.shape[1],))
ctx_encoded = Dense(64)(input_ctx)
ctx_encoded = BatchNormalization()(ctx_encoded)
ctx_encoded = ReLU()(ctx_encoded)
ctx_encoded = Dropout(0.1)(ctx_encoded)

combined = Concatenate()([home_encoded, away_encoded, ctx_encoded])
output = Dense(64)(combined)
output = BatchNormalization()(output)
output = ReLU()(output)
output = Dropout(0.2)(output)
output = Dense(3, activation='softmax')(output)

model = Model(inputs=[input_home, input_away, input_ctx], outputs=output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train
model.fit([X_home_train, X_away_train, X_ctx_train], y_train, 
          validation_split=0.2, epochs=50, batch_size=32, verbose=1)

Epoch 1/50
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.4477 - loss: 1.1706 - val_accuracy: 0.4949 - val_loss: 1.0091
Epoch 2/50
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5473 - loss: 0.9670 - val_accuracy: 0.4867 - val_loss: 0.9977
Epoch 3/50
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5608 - loss: 0.9477 - val_accuracy: 0.5072 - val_loss: 1.0044
Epoch 4/50
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5591 - loss: 0.9543 - val_accuracy: 0.5092 - val_loss: 1.0017
Epoch 5/50
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5704 - loss: 0.9232 - val_accuracy: 0.5277 - val_loss: 1.0021
Epoch 6/50
[1m61/61[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6037 - loss: 0.8769 - val_accuracy: 0.5236 - val_loss: 0.9919
Epoch 7/50
[1m61/61[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1523c0bca40>

In [None]:
test_loss, test_acc = model.evaluate([X_home_test, X_away_test, X_ctx_test], y_test)
print("Test Accuracy:", test_acc)

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5235 - loss: 1.1329 
Test Accuracy: 0.5180920958518982


In [None]:
# Evaluate model
test_loss, test_acc = model.evaluate([X_home_test, X_away_test, X_ctx_test], y_test)
print("Test Accuracy:", test_acc)

from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import numpy as np

y_pred_probs = model.predict([X_home_test, X_away_test, X_ctx_test])
y_pred_classes = np.argmax(y_pred_probs, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

accuracy = accuracy_score(y_true_classes, y_pred_classes)
precision, recall, f1, support = precision_recall_fscore_support(
    y_true_classes, y_pred_classes, average=None, labels=[0, 1, 2]
)
macro_precision, macro_recall, macro_f1, _ = precision_recall_fscore_support(
    y_true_classes, y_pred_classes, average='macro'
)

print(f"\nTest Accuracy: {accuracy:.4f}")
print(f"Macro Precision: {macro_precision:.4f}")
print(f"Macro Recall: {macro_recall:.4f}")
print(f"Macro F1 Score: {macro_f1:.4f}\n")

for i, (p, r, f, s) in enumerate(zip(precision, recall, f1, support)):
    print(f"Class {i} — Precision: {p:.4f}, Recall: {r:.4f}, F1: {f:.4f}, Support: {s}")