In [1]:
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import torch
import torch.nn as nn
from IPython.display import display
from matplotlib import pyplot as plt

In [2]:
def get_driver_data(session, driver):
    driver_data = session.laps.pick_driver(driver)
    driver_data["Driver"] = driver
    return driver_data
    

In [3]:
def adjust_laptimes(dataframe):
    dataframe["Laptime"] = dataframe["LapTime"].astype(int) * 1e-9  # Converts LapTime to seconds
    dataframe["Position(s)"] = dataframe["Position"].astype(int)  # Converts Position to integer
    new_df = dataframe.drop(columns=["LapTime", "Position"])  # Drops LapTime and Position columns
    new_df = new_df.rename(columns={"Position(s)": "Position"})  # Renames Position(s) to Position
    return new_df

def get_common(dataframe):
    new_df = dataframe[["LapNumber", "TrackStatus"]]  # Selects only LapNumber and TrackStatus
    return new_df

def get_pitstop(dataframe):
    # Replace NaT or NaN with 0 for both columns
    dataframe["PitInTime"] = dataframe["PitInTime"].fillna(0)
    dataframe["PitOutTime"] = dataframe["PitOutTime"].fillna(0)
    
    # Convert Timedelta to seconds using total_seconds()
    dataframe["PitInTime"] = dataframe["PitInTime"].apply(lambda x: float(x.total_seconds()) if isinstance(x, pd.Timedelta) else x)
    dataframe["PitOutTime"] = dataframe["PitOutTime"].apply(lambda x: float(x.total_seconds()) if isinstance(x, pd.Timedelta) else x)
    
    # Create the PitstopTime column as the difference between PitOutTime and PitInTime
    
    return dataframe
    

In [4]:
def remove_extras(dataframe):
    new_df = dataframe[["Driver", "Laptime", "PitstopTime", "Position", "Compound", "Position", "Source"]]  # Drops unnecessary columns
    return new_df


In [5]:
def drop_unecessary_telem(dataframe):
    dropped_dataframe = dataframe.drop(columns=["DriverAhead", 'SessionTime', 'Date', 'Time'])
    guess_free_dataframe = dropped_dataframe[dropped_dataframe['Source'] != 'interpolation']
    return guess_free_dataframe

In [6]:
from concurrent.futures import ThreadPoolExecutor

In [7]:
drivers = ['VER', 'PER', 'LEC', 'SAI', 'HAM', 'NOR', 'ALO', 'RUS', 'OCO']
years = [2022, 2023, 2024]

retirements = [('RUS',2022), ('OCO', 2022), ('OCO', 2023), ('RUS', 2024)]

sessions = {}

for year in years:
    print(f"Loading {year} Silverstone Race...")
    session = fastf1.get_session(year, "Silverstone", "R")
    session.load(telemetry=True)
    sessions[year] = session




# def process_driver_telemetry(driver, year):

#     telemetry_frames = []

#     session = fastf1.get_session(year, 'Silverstone', 'R')
#     session.load(telemetry=True)
        
#     for driver in drivers:
#         driver_data = session.laps.pick_driver(driver)
#         driver_data["Driver"] = driver

#         for _, lap in driver_data.iterlaps():
#             telemetry = lap.get_telemetry()

#             # Add lap-level info to every telemetry sample in this lap
#             telemetry['LapNumber'] = lap['LapNumber']
#             telemetry['LapTime'] = lap['LapTime']
#             telemetry['Compound'] = lap['Compound']
#             telemetry['TyreLife'] = lap['TyreLife']
#             telemetry['Driver'] = driver
#             telemetry['Year'] = year

#             telemetry_frames.append(telemetry)

#     return pd.concat(telemetry_frames, ignore_index=True)

# tasks = [(d, y) for y in years for d in drivers]

# with ThreadPoolExecutor(max_workers=8) as executor:
#     results = list(executor.map(lambda args: process_driver_telemetry(*args), tasks))


core           INFO 	Loading data for British Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Loading 2022 Silverstone Race...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['55', '11', '44', '16', '14', '4', '1', '47', '5', '20', '18', '6', '3', '22', '31', '10', '77', '63', '24', '23']
core           INFO 	Loading data for British Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info


Loading 2023 Silverstone Race...


req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '44', '81', '63', '11', '14', '23', '16', '55', '2', '77', '27', '18', '24', '22', '21', '10', '20', '31']
core           INFO 	Loading data for British Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Loading 2024 Silverstone Race...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '1', '4', '81', '55', '27', '18', '14', '23', '22', '2', '20', '3', '16', '77', '31', '11', '24', '63', '10']


In [8]:
telemetry_frames = []

for year, session in sessions.items():
    for driver in drivers:

        if (driver, year) in retirements:
            continue
        driver_data = session.laps.pick_driver(driver)
        driver_data["Driver"] = driver

        for _, lap in driver_data.iterlaps():
            telemetry = lap.get_telemetry()
            telemetry["LapNumber"] = lap["LapNumber"]
            telemetry["LapTime"] = lap["LapTime"]
            telemetry["Compound"] = lap["Compound"]
            telemetry["TyreLife"] = lap["TyreLife"]
            telemetry["Driver"] = driver
            telemetry["Year"] = year

            telemetry_frames.append(telemetry)

driver_telemetry = pd.concat(telemetry_frames, ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  driver_data["Driver"] = driver
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  driver_data["Driver"] = driver
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  driver_data["Driver"] = driver
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = val

In [9]:
driver_telemetry = pd.DataFrame(drop_unecessary_telem(driver_telemetry))

In [10]:
def adjust_laptimes(dataframe: pd.DataFrame):
    dataframe["Laptime"] = pd.to_timedelta(dataframe["LapTime"]).dt.total_seconds()
    dataframe.drop(columns=["LapTime"], inplace=True)  # Converts LapTime to seconds # Drops LapTime and Position columns
    return dataframe

In [11]:
test_laptimes = pd.DataFrame({
    'LapTime': [pd.Timedelta('1 days 2 hours 30 minutes'), 
                 pd.Timedelta('0 days 1 hour 15 minutes 45 seconds')]
})

adjust_laptimes(test_laptimes)

Unnamed: 0,Laptime
0,95400.0
1,4545.0


In [12]:
driver_telemetry = adjust_laptimes(driver_telemetry)

test_render = driver_telemetry.copy()

In [13]:
test_render["Driver"].unique()

array(['VER', 'PER', 'LEC', 'SAI', 'HAM', 'NOR', 'ALO', 'RUS', 'OCO'],
      dtype=object)

### Min Max and Normalize

In [14]:
from sklearn.preprocessing import MinMaxScaler

In [15]:
scaler = MinMaxScaler()

normalize_columns = ['RPM', 'DistanceToDriverAhead', 'Speed', 'Distance', 'X', 'Y', 'Z', 'RPM', 'Throttle', 'RelativeDistance']


driver_telemetry[normalize_columns] = scaler.fit_transform(pd.DataFrame(driver_telemetry[normalize_columns]))



In [16]:
driver_telemetry['Brake'] = driver_telemetry['Brake'].astype(int)

In [17]:
tyre_ohe = OneHotEncoder()

tyre_ohe.fit(np.array(["SOFT", "MEDIUM", "HARD", "INTERMEDIATE", "WET", "-999"]).reshape(-1, 1))


encoded_tyres = tyre_ohe.transform(driver_telemetry['Compound'].astype(str).to_numpy().reshape(-1, 1)).toarray()

encoded_df = pd.DataFrame(
    encoded_tyres,
    columns=tyre_ohe.get_feature_names_out(['Compound']),
    index=driver_telemetry.index
)
driver_telemetry = pd.concat([driver_telemetry.drop(columns=['Compound']), encoded_df], axis=1)

In [18]:
laptime_scaler = MinMaxScaler()

lap_times_label = driver_telemetry.groupby(['Driver', 'LapNumber'], as_index=False)['Laptime'].max()

lap_times_label['Laptime'] = (
    lap_times_label
    .groupby('Driver')['Laptime']
    .transform(lambda x: MinMaxScaler().fit_transform(x.values.reshape(-1, 1)).flatten())
)

In [19]:
driver_encr = LabelEncoder()

driver_telemetry["Driver"] = driver_encr.fit_transform(driver_telemetry["Driver"])

normalized_test = driver_telemetry.copy()

In [20]:
normalized_test["Driver"].unique()

array([8, 5, 2, 7, 1, 3, 0, 6, 4])

In [21]:
pad_max = (0, 0)
avg = 0
count = 0

for driver in driver_telemetry['Driver'].unique():
    driver_df = driver_telemetry[driver_telemetry['Driver'] == driver]

    for x in range(1, int(driver_df['LapNumber'].max()) + 1):
        curr = driver_df[driver_df['LapNumber'] == x].shape[0]
        avg += curr
        count += 1
        if curr > pad_max[1]:
            pad_max = ((driver, x), curr)

print("Most telemetry points in 1 lap:", pad_max)
print("Average Telemetry Points per Lap:", avg // count)


Most telemetry points in 1 lap: ((np.int64(8), 34), 2913)
Average Telemetry Points per Lap: 1918


In [22]:
driver_telemetry = driver_telemetry.drop(columns=['Laptime'])

In [23]:
padded_data = []

for driver in driver_telemetry["Driver"].unique():

    driver_df = driver_telemetry[driver_telemetry["Driver"] == driver]

    for x in range(1,52):

        lap_data = driver_df[driver_df['LapNumber'] == x]
        telem_points = lap_data.shape[0]

        if telem_points < pad_max[1]:

            padding = pd.DataFrame(np.full((pad_max[1] - telem_points, lap_data.shape[1]), -999), columns= lap_data.columns)
            padding['LapNumber'] = x
            padding['Driver'] = driver

            lap_data = pd.concat([lap_data, padding], ignore_index=True)

        padded_data.append(lap_data)

driver_telemetry = pd.concat(padded_data, ignore_index= True)

padded_test = driver_telemetry.copy()

In [24]:
driver_telemetry.head()

Unnamed: 0,DistanceToDriverAhead,RPM,Speed,nGear,Throttle,Brake,DRS,Source,Distance,RelativeDistance,...,LapNumber,TyreLife,Driver,Year,Compound_-999,Compound_HARD,Compound_INTERMEDIATE,Compound_MEDIUM,Compound_SOFT,Compound_WET
0,0.0,0.689413,0.0,1,0.105769,1,1,pos,3.2e-05,3.1e-05,...,1.0,1.0,8,2022,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.671197,0.0,1,0.105769,1,1,pos,4.2e-05,4.2e-05,...,1.0,1.0,8,2022,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.665125,0.0,1,0.105769,0,1,car,6e-05,6e-05,...,1.0,1.0,8,2022,0.0,0.0,0.0,0.0,1.0,0.0
3,0.0,0.599056,0.011799,1,0.120192,0,1,pos,9.1e-05,9.3e-05,...,1.0,1.0,8,2022,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.532987,0.023599,1,0.134615,0,1,car,0.000135,0.00014,...,1.0,1.0,8,2022,0.0,0.0,0.0,0.0,1.0,0.0


In [25]:
driver_telemetry = driver_telemetry.drop(columns=['Source', 'Status'])

In [26]:
X = []

for lap_num, lap_df in driver_telemetry.groupby(["LapNumber", "Driver"]):
    lap_array = lap_df.values  # shape: (952, num_features)
    X.append(lap_array)

X = np.array(X)  # shape: (num_laps, 952, num_features)


y = np.array(lap_times_label['Laptime'])

In [None]:
X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.float32)

telemetry_idx = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
context_idx = [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]

telemetry = torch.tensor(X[:, :, telemetry_idx], dtype=torch.float32)
context = torch.tensor(X[:, 0, context_idx], dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

In [None]:
class SilverstoneModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.driver_embedding = nn.Embedding(20, 10)
        self.tyre_embedding = nn.Embedding(5, 3)
        self.year_embedding = nn.Embedding(3, 2)
        self.lstm1= nn.LSTM(12, 64)
        self.lstm2 = nn.LSTM(64, 128)
        self.sequential = nn.Sequential(
            nn.Linear(in_features = 128, out_features= 24, bias=True),
            nn.Linear(in_features=24, out_features=1, bias=True)
        )

    def forward(self, telemetry,l context):
        

SyntaxError: incomplete input (1463779478.py, line 14)