In [1]:
from src.dataset import ATPMatchesDataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import torch.nn as nn

import torch

In [2]:
dataset = ATPMatchesDataset('data/processed/atp_matches_2000_2024_final.csv')
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [4]:
# Calculate split sizes
total_size = len(dataset)
train_size = int(0.90 * total_size)
dev_size = int(0.05 * total_size)
test_size = total_size - train_size - dev_size

# Split dataset
train_dataset, dev_dataset, test_dataset = random_split(
    dataset,
    [train_size, dev_size, test_size],
    generator=torch.Generator().manual_seed(42)
)

# Create DataLoaders for each split
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [5]:

class ATPMatchPredictor(nn.Module):
    def __init__(self, input_size, hidden_sizes=[128, 64, 32]):
        super(ATPMatchPredictor, self).__init__()

        layers = []
        prev_size = input_size

        # Hidden layers
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(prev_size, hidden_size))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(0.3))
            prev_size = hidden_size

        # Output layer (binary classification)
        layers.append(nn.Linear(prev_size, 1))
        layers.append(nn.Sigmoid())

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

In [6]:
# Get input size from first batch
sample_batch = next(iter(train_loader))
features = sample_batch['features']
labels = sample_batch['target']
input_size = features.shape[1]

print(f"Input size: {input_size}")

# Initialize model
model = ATPMatchPredictor(input_size=input_size, hidden_sizes=[128, 64, 32])
print(model)

# Loss and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 50

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for batch in train_loader:
        features = batch['features']
        labels = batch['target'].float()  # Convert to float for BCELoss

        # Forward pass
        outputs = model(features)
        loss = criterion(outputs.squeeze(), labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    # Validation
    model.eval()
    dev_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for batch in dev_loader:
            features = batch['features']
            labels = batch['target'].float()

            outputs = model(features)
            loss = criterion(outputs.squeeze(), labels)
            dev_loss += loss.item()

            predicted = (outputs.squeeze() > 0.5).float()
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    accuracy = 100 * correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], '
          f'Train Loss: {train_loss/len(train_loader):.4f}, '
          f'Dev Loss: {dev_loss/len(dev_loader):.4f}, '
          f'Dev Accuracy: {accuracy:.2f}%')

Input size: 212
ATPMatchPredictor(
  (network): Sequential(
    (0): Linear(in_features=212, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.3, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.3, inplace=False)
    (6): Linear(in_features=64, out_features=32, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.3, inplace=False)
    (9): Linear(in_features=32, out_features=1, bias=True)
    (10): Sigmoid()
  )
)
Epoch [1/50], Train Loss: 0.6521, Dev Loss: 0.6287, Dev Accuracy: 64.20%
Epoch [2/50], Train Loss: 0.6386, Dev Loss: 0.6141, Dev Accuracy: 64.79%
Epoch [3/50], Train Loss: 0.6353, Dev Loss: 0.6348, Dev Accuracy: 64.14%
Epoch [4/50], Train Loss: 0.6350, Dev Loss: 0.6228, Dev Accuracy: 65.10%
Epoch [5/50], Train Loss: 0.6330, Dev Loss: 0.6158, Dev Accuracy: 64.30%
Epoch [6/50], Train Loss: 0.6336, Dev Loss: 0.6095, Dev Accuracy: 65.13%
Epoch [7/50], Train Loss: 0.6317, Dev Loss: 0.6187, Dev Accuracy: 65.