In [1]:
#Data Download directory

dir="/home/advaita/Downloads/data"

In [2]:
#Imports
import numpy as np 
import pandas as pd 

import seaborn as sns
from matplotlib import pyplot as plt
sns.set_style("whitegrid")
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")
import os 

In [3]:
#Frames


df_stops=pd.read_csv(dir+"/pit_stops.csv")
df_constructor_standing=pd.read_csv(dir+"/constructor_standings.csv")
df_races=pd.read_csv(dir+"/races.csv")

In [4]:
#Merge Frames

commmon_columns_CS_PT=["raceId"]
commmon_columns_CS_PT_RC=["raceId"]

merged_CS_PT=pd.merge(df_stops,df_constructor_standing,on=commmon_columns_CS_PT,how="outer")
merged_CS_PT_RC=pd.merge(merged_CS_PT,df_races,on=commmon_columns_CS_PT_RC,how="outer")



In [5]:
print(merged_CS_PT_RC.describe())

              raceId       driverId           stop            lap  \
count  127112.000000  116628.000000  116628.000000  116628.000000   
mean      924.101194     532.056590       1.799705      25.332390   
std       186.907666     388.151594       1.510880      14.841635   
min         1.000000       1.000000       1.000000       1.000000   
25%       873.000000      18.000000       1.000000      13.000000   
50%       950.000000     816.000000       2.000000      25.000000   
75%      1041.000000     832.000000       2.000000      36.000000   
max      1144.000000     860.000000      70.000000      78.000000   

       milliseconds  constructorStandingsId  constructorId         points  \
count  1.166280e+05           127036.000000  127036.000000  127036.000000   
mean   8.283363e+04            25569.222339      69.295782      92.455713   
std    3.049661e+05             4308.546747      84.177822     136.009386   
min    1.289700e+04                1.000000       1.000000       0.000

In [6]:
#Data cleaning


# nan_count = merged_CS_PT_RC.isna().sum()
# print(nan_count)

# rows_with_nan = merged_CS_PT_RC[merged_CS_PT_RC.isna().any(axis=1)]
# print(rows_with_nan)

df_cleaned=merged_CS_PT_RC.drop(columns=["round","date","time_y","url","fp1_date","fp1_time","fp3_time","quali_date","fp2_date","fp2_time","fp3_date","quali_time","sprint_date","sprint_time"])
# nan_count = merged_CS_PT_RC.isna().sum()
# print(nan_count)


# Assuming df is your DataFrame

# Print all rows containing NaN values
df_cleaned.dropna(inplace=True)
df_sampled=df_cleaned.sample(n=1000,random_state=42)

# print(df_cleaned.describe())
print(df_cleaned.describe())

              raceId       driverId           stop            lap  \
count  116628.000000  116628.000000  116628.000000  116628.000000   
mean      970.411754     532.056590       1.799705      25.332390   
std        89.497698     388.151594       1.510880      14.841635   
min       841.000000       1.000000       1.000000       1.000000   
25%       887.000000      18.000000       1.000000      13.000000   
50%       960.000000     816.000000       2.000000      25.000000   
75%      1052.000000     832.000000       2.000000      36.000000   
max      1132.000000     860.000000      70.000000      78.000000   

       milliseconds  constructorStandingsId  constructorId         points  \
count  1.166280e+05           116628.000000  116628.000000  116628.000000   
mean   8.283363e+04            26588.007408      71.625339      99.149823   
std    3.049661e+05             1331.130241      86.192215     139.625558   
min    1.289700e+04            24540.000000       1.000000       0.000

Visuilazing data

In [7]:
# sns.pairplot(df_sampled)

In [8]:
# for i in df_cleaned:
#     plt.figure(figsize=(10,5))
#     sns.violinplot(data=df_sampled,y=i)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

net = nn.Sequential(
    nn.Linear(4, 256),  
    nn.ELU(),
    nn.Linear(256, 128),
    nn.ELU(),
    nn.Linear(128, 64),
    nn.ELU(),
    nn.Linear(64, 1)  
)

optimizer = optim.Adam(net.parameters(), lr=0.001)  
criterion = nn.MSELoss()

input_data = df_cleaned[['circuitId', 'raceId', 'points', 'wins']].values
target_data = df_cleaned['stop'].values

scaler = StandardScaler()
input_data_scaled = scaler.fit_transform(input_data)

X_train, X_test, y_train, y_test = train_test_split(input_data_scaled, target_data, test_size=0.3, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1).to(device)

train_data = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)

net.to(device)

best_loss = float('inf')
early_stop_counter = 0
patience = 20  

for epoch in range(1000):
    net.train() 
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()

        pred = net(X_batch)  
        loss = criterion(pred, y_batch) 

        loss.backward()  
        optimizer.step() 

    net.eval()  
    with torch.no_grad():
        y_test_pred = net(X_test_tensor)  
        val_loss = criterion(y_test_pred, y_test_tensor)  
    
    if val_loss.item() < best_loss:
        best_loss = val_loss.item()
        early_stop_counter = 0  
    else:
        early_stop_counter += 1
        if early_stop_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Training Loss: {loss.item()}, Validation Loss: {val_loss.item()}')

net.eval()
with torch.no_grad():
    y_test_pred = net(X_test_tensor)
    final_test_loss = criterion(y_test_pred, y_test_tensor)
    print(f'\nFinal Test Loss: {final_test_loss.item()}')


cpu
Epoch 10, Training Loss: 0.1979583352804184, Validation Loss: 2.1499016284942627
Epoch 20, Training Loss: 0.5274561643600464, Validation Loss: 2.1312179565429688
