In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd


In [10]:

# Load your dataset (replace 'your_dataset.csv' with your actual dataset file)
data = pd.read_csv('../data/results_2223_with_gw.csv')


In [11]:
# Selecting relevant columns for prediction
selected_features = ['home_team', 'away_team']

# Encoding categorical variables (home_team and away_team)
label_encoder = LabelEncoder()
data['home_team'] = label_encoder.fit_transform(data['home_team'])
data['away_team'] = label_encoder.transform(data['away_team'])

# Preprocessing data
X = data[selected_features]
y_home = data['home_score']
y_away = data['away_score']

# Split the data into train and test sets
X_train, X_test, y_home_train, y_home_test, y_away_train, y_away_test = train_test_split(
    X, y_home, y_away, test_size=0.2, random_state=42
)

# Convert data to PyTorch tensors
X_train_tensor = torch.from_numpy(X_train.astype(np.float32).values)
y_home_train_tensor = torch.from_numpy(y_home_train.astype(np.float32).values)
y_away_train_tensor = torch.from_numpy(y_away_train.astype(np.float32).values)
X_test_tensor = torch.from_numpy(X_test.astype(np.float32).values)
y_home_test_tensor = torch.from_numpy(y_home_test.astype(np.float32).values)
y_away_test_tensor = torch.from_numpy(y_away_test.astype(np.float32).values)

# Define the neural network architecture for predicting home and away scores
class FootballScorePredictor(nn.Module):
    def __init__(self, input_size, team_vocab_size, embedding_dim, hidden_size, output_size):
        super(FootballScorePredictor, self).__init__()
        self.embedding_home = nn.Embedding(team_vocab_size, embedding_dim)
        self.embedding_away = nn.Embedding(team_vocab_size, embedding_dim)
        self.fc1 = nn.Linear(input_size + 2 * embedding_dim, hidden_size)
        self.relu = nn.ReLU()
        self.fc_home = nn.Linear(hidden_size, output_size)  # Output layer for home score prediction
        self.fc_away = nn.Linear(hidden_size, output_size)  # Output layer for away score prediction

    def forward(self, x, home_team, away_team):
        home_team = home_team.long()  # Convert to LongTensor
        away_team = away_team.long()  # Convert to LongTensor

        embed_home = self.embedding_home(home_team)
        embed_away = self.embedding_away(away_team)
        x = torch.cat((x, embed_home, embed_away), dim=1)
        x = self.fc1(x)
        x = self.relu(x)
        home_score = self.fc_home(x)  # Predict home score
        away_score = self.fc_away(x)  # Predict away score
        return home_score, away_score

# Initialize the model, define loss function, optimizer, and embeddings parameters
input_size = len(selected_features)  # Update with the number of features
team_vocab_size = len(label_encoder.classes_)
embedding_dim = 10  # Dimensionality of team embeddings
hidden_size = 64  # Number of neurons in the hidden layer
output_size = 1  # Output size for each home and away score prediction
model = FootballScorePredictor(input_size, team_vocab_size, embedding_dim, hidden_size, output_size)
criterion = nn.MSELoss()  # Mean Squared Error loss for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer

# Training the model
num_epochs = 100000
for epoch in range(num_epochs):
    # Forward pass
    home_outputs, away_outputs = model(X_train_tensor, X_train_tensor[:, -2], X_train_tensor[:, -1])  # Pass team indices
    loss_home = criterion(home_outputs.squeeze(), y_home_train_tensor)  # Calculate home score loss
    loss_away = criterion(away_outputs.squeeze(), y_away_train_tensor)  # Calculate away score loss
    loss = loss_home + loss_away  # Combined loss for both scores

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10000 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Total Loss: {loss.item()}')


Epoch [10000/100000], Total Loss: 0.0005
Epoch [20000/100000], Total Loss: 0.0000
Epoch [30000/100000], Total Loss: 0.0000
Epoch [40000/100000], Total Loss: 0.0000
Epoch [50000/100000], Total Loss: 0.0000
Epoch [60000/100000], Total Loss: 0.0000
Epoch [70000/100000], Total Loss: 0.0000
Epoch [80000/100000], Total Loss: 0.0000
Epoch [90000/100000], Total Loss: 0.0000
Epoch [100000/100000], Total Loss: 0.0000


In [12]:
import plotly.express as px
import plotly.graph_objects as go

# Inside the evaluation section
with torch.no_grad():
    home_outputs, away_outputs = model(X_test_tensor, X_test_tensor[:, -2], X_test_tensor[:, -1])  # Pass team indices
    home_outputs = home_outputs.squeeze().numpy()  # Convert predicted home scores to NumPy array
    away_outputs = away_outputs.squeeze().numpy()  # Convert predicted away scores to NumPy array
    y_home_test_np = y_home_test_tensor.numpy()  # Convert actual home scores to NumPy array
    y_away_test_np = y_away_test_tensor.numpy()  # Convert actual away scores to NumPy array
    home_teams = label_encoder.inverse_transform(X_test['home_team'])  # Inverse transform encoded home team labels
    away_teams = label_encoder.inverse_transform(X_test['away_team'])  # Inverse transform encoded away team labels

    # Create separate DataFrames for home and away games
    df_home = pd.DataFrame({
        'Team': home_teams,
        'Opponent': away_teams,
        'Actual Score': y_home_test_np,
        'Predicted Score': home_outputs
    })

    df_away = pd.DataFrame({
        'Team': away_teams,
        'Opponent': home_teams,
        'Actual Score': y_away_test_np,
        'Predicted Score': away_outputs
    })

    # Concatenate DataFrames for easier plotting
    df_all = pd.concat([df_home, df_away])

    # Loop through each unique team and create individual graphs for home and away games
    unique_teams = df_all['Team'].unique()
    for team in unique_teams:
        team_df = df_all[df_all['Team'] == team]
        fig = px.scatter(team_df, x='Actual Score', y='Predicted Score', color='Opponent',
                         title=f'{team} Home and Away Games: Predicted vs Actual Scores',
                         labels={'Actual Score': 'Actual Score', 'Predicted Score': 'Predicted Score'},
                         hover_data=['Opponent'])
        line_x_values = np.linspace(0, 6)
        line_y_values = line_x_values

        # Add a trace for the line y = x
        fig.add_trace(go.Scatter(x=line_x_values, y=line_y_values, mode='lines', name='y = x'))

        fig.update_traces(marker=dict(size=8, opacity=0.7), selector=dict(mode='markers'))
        fig.show()
