In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

class MLPModel(nn.Module):
    def __init__(self, circuit_id_max, race_id_max, embedding_dim):
        super(MLPModel, self).__init__()
        
        # Embedding layers for circuitId and raceId
        self.circuit_embedding = nn.Embedding(circuit_id_max, embedding_dim)
        self.race_embedding = nn.Embedding(race_id_max, embedding_dim)
        
        # Fully connected layers (considering embeddings + points + wins)
        self.fc1 = nn.Linear(embedding_dim * 2 + 2, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 1)

    def forward(self, circuit_id, race_id, points_wins):
        # Embeddings
        circuit_embedded = self.circuit_embedding(circuit_id)
        race_embedded = self.race_embedding(race_id)
        
        # Concatenate embeddings with continuous features (points and wins)
        x = torch.cat([circuit_embedded, race_embedded, points_wins], dim=1)
        
        # Forward pass through fully connected layers
        x = torch.elu(self.fc1(x))
        x = torch.elu(self.fc2(x))
        x = torch.elu(self.fc3(x))
        x = self.fc4(x)
        
        return x

# Parameters
circuit_id_max = 121  # Assuming circuitId ranges from 0 to 120
race_id_max = df_cleaned['raceId'].nunique()  # Number of unique raceId values
embedding_dim = 8  # Dimension for embedding layers

# Model instance
net = MLPModel(circuit_id_max, race_id_max, embedding_dim).to(device)

# Optimizer and Loss function
optimizer = optim.Adam(net.parameters(), lr=0.001)  
criterion = nn.MSELoss()

# Data preparation
input_data = df_cleaned[['circuitId', 'raceId', 'points', 'wins']].values
target_data = df_cleaned['stop'].values

# Scale continuous features (points and wins)
scaler = StandardScaler()
input_data[:, 2:] = scaler.fit_transform(input_data[:, 2:])

# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(input_data, target_data, test_size=0.3, random_state=42)

# Tensor conversion
X_train_tensor = torch.tensor(X_train[:, :2], dtype=torch.long).to(device)  # Circuit and race IDs as long (int)
points_wins_train_tensor = torch.tensor(X_train[:, 2:], dtype=torch.float32).to(device)  # Points and wins
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1).to(device)

X_test_tensor = torch.tensor(X_test[:, :2], dtype=torch.long).to(device)
points_wins_test_tensor = torch.tensor(X_test[:, 2:], dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1).to(device)

# Creating a DataLoader
train_data = TensorDataset(X_train_tensor, points_wins_train_tensor, y_train_tensor)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)

# Training with Early Stopping
best_loss = float('inf')
early_stop_counter = 0
patience = 20

for epoch in range(1000):
    net.train()
    for circuit_race_ids, points_wins, y_batch in train_loader:
        circuit_ids, race_ids = circuit_race_ids[:, 0], circuit_race_ids[:, 1]
        
        optimizer.zero_grad()
        pred = net(circuit_ids, race_ids, points_wins)  
        loss = criterion(pred, y_batch)
        
        loss.backward()
        optimizer.step()

    # Validation
    net.eval()
    with torch.no_grad():
        circuit_ids_test, race_ids_test = X_test_tensor[:, 0], X_test_tensor[:, 1]
        y_test_pred = net(circuit_ids_test, race_ids_test, points_wins_test_tensor)
        val_loss = criterion(y_test_pred, y_test_tensor)
    
    if val_loss.item() < best_loss:
        best_loss = val_loss.item()
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch+1}, Training Loss: {loss.item()}, Validation Loss: {val_loss.item()}')

# Final Test Evaluation
net.eval()
with torch.no_grad():
    final_test_loss = criterion(y_test_pred, y_test_tensor)
    print(f'\nFinal Test Loss: {final_test_loss.item()}')
