In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [2]:
df = pd.read_csv("mancity23-24.csv")

In [3]:
# Drop irrelevant columns
df.drop(columns=['Unnamed: 0', 'Date', 'Venue'], inplace=True)

In [4]:
# Encode target variable
label_encoder = LabelEncoder()
df['Result'] = label_encoder.fit_transform(df['Result'])  # W=2, D=0, L=1

In [5]:
# One-hot encode categorical features
categorical_cols = ['H/A', 'Competition', 'opponent(opp)']
df = pd.get_dummies(df, columns=categorical_cols)

In [6]:
# Separate features and label
X = df.drop(columns=['Result'])
y = df['Result']

In [7]:
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [8]:
# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [9]:
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.long)
y_test = torch.tensor(y_test.values, dtype=torch.long)

In [10]:
# Define neural network
class MatchPredictor(nn.Module):
    def __init__(self, input_size):
        super(MatchPredictor, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 3)  # 3 output classes: Win, Draw, Loss

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

In [11]:
# Initialize model
model = MatchPredictor(input_size=X_train.shape[1])
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [12]:
# Training loop
for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    output = model(X_train)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 10, Loss: 0.8847
Epoch 20, Loss: 0.7019
Epoch 30, Loss: 0.5261
Epoch 40, Loss: 0.3901
Epoch 50, Loss: 0.2844
Epoch 60, Loss: 0.1938
Epoch 70, Loss: 0.1232
Epoch 80, Loss: 0.0728
Epoch 90, Loss: 0.0398
Epoch 100, Loss: 0.0224


In [13]:
# Evaluation
model.eval()
with torch.no_grad():
    predictions = model(X_test).argmax(dim=1)
    accuracy = (predictions == y_test).float().mean()
    print(f"Test Accuracy: {accuracy:.2%}")

Test Accuracy: 75.00%


In [14]:
# Predict tomorrow's match
# Example: Home match vs Arsenal, 60% possession, 15 shots, etc.
new_match = {
    "Possession(%)": 60,
    "Goals": 2,
    "Total attempts": 15,
    "On target": 7,
    "Passes": 600,
    "Fouls": 10,
    "Yellow cards": 1,
    "Red cards": 0,
    "H/A_H": 1,
    "H/A_A": 0,
    "Competition_Premier League": 1,
    "opponent(opp)_Arsenal": 1,
}

In [15]:
# Fill missing columns with 0
for col in X.columns:
    if col not in new_match:
        new_match[col] = 0

In [17]:
# Reorder and scale
# Get the values for the original columns in the same order as the training data
match_data = [new_match[col] for col in X.columns]
match_tensor = torch.tensor(scaler.transform([match_data]), dtype=torch.float32)

model.eval()
with torch.no_grad():
    probs = torch.softmax(model(match_tensor), dim=1)
    win_prob = probs[0][label_encoder.transform(['W'])[0]].item()
    draw_prob = probs[0][label_encoder.transform(['D'])[0]].item()
    loss_prob = probs[0][label_encoder.transform(['L'])[0]].item()




In [18]:

print(f"🔮 Prediction for tomorrow’s match:")
print(f"Win: {win_prob:.2%}, Draw: {draw_prob:.2%}, Loss: {loss_prob:.2%}")

🔮 Prediction for tomorrow’s match:
Win: 99.90%, Draw: 0.00%, Loss: 0.10%
