In [1]:
import fastf1
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import tensorflow as tf
import keras
from keras import layers
from IPython.display import display
from matplotlib import pyplot as plt

In [2]:
def get_driver_data(session, driver):
    driver_data = session.laps.pick_driver(driver)
    driver_data["Driver"] = driver
    return driver_data
    

In [3]:
def adjust_laptimes(dataframe):
    dataframe["Laptime"] = dataframe["LapTime"].astype(int) * 1e-9  # Converts LapTime to seconds
    dataframe["Position(s)"] = dataframe["Position"].astype(int)  # Converts Position to integer
    new_df = dataframe.drop(columns=["LapTime", "Position"])  # Drops LapTime and Position columns
    new_df = new_df.rename(columns={"Position(s)": "Position"})  # Renames Position(s) to Position
    return new_df

def get_common(dataframe):
    new_df = dataframe[["LapNumber", "TrackStatus"]]  # Selects only LapNumber and TrackStatus
    return new_df

def get_pitstop(dataframe):
    # Replace NaT or NaN with 0 for both columns
    dataframe["PitInTime"] = dataframe["PitInTime"].fillna(0)
    dataframe["PitOutTime"] = dataframe["PitOutTime"].fillna(0)
    
    # Convert Timedelta to seconds using total_seconds()
    dataframe["PitInTime"] = dataframe["PitInTime"].apply(lambda x: float(x.total_seconds()) if isinstance(x, pd.Timedelta) else x)
    dataframe["PitOutTime"] = dataframe["PitOutTime"].apply(lambda x: float(x.total_seconds()) if isinstance(x, pd.Timedelta) else x)
    
    # Create the PitstopTime column as the difference between PitOutTime and PitInTime
    
    return dataframe
    

In [4]:
def remove_extras(dataframe):
    new_df = dataframe[["Driver", "Laptime", "PitstopTime", "Position", "Compound", "Position", "Source"]]  # Drops unnecessary columns
    return new_df


In [5]:
def drop_unecessary_telem(dataframe):
    dropped_dataframe = dataframe.drop(columns=["DriverAhead", 'SessionTime', 'Date', 'Time'])
    guess_free_dataframe = dropped_dataframe[dropped_dataframe['Source'] != 'interpolation']
    return guess_free_dataframe

In [6]:
from concurrent.futures import ThreadPoolExecutor

In [7]:
drivers = ["VER", "PER", "LEC", "SAI", "HAM", "RUS", "NOR", "ALO", "OCO", "GAS"]
years = [2022, 2023, 2024]

retirements = [('RUS',2022), ('OCO', 2022), ('OCO', 2023), ('GAS', 2023), ("GAS", 2024), ('RUS', 2024)]

sessions = {}

for year in years:
    print(f"Loading {year} Silverstone Race...")
    session = fastf1.get_session(year, "Silverstone", "R")
    session.load(telemetry=True)
    sessions[year] = session




# def process_driver_telemetry(driver, year):

#     telemetry_frames = []

#     session = fastf1.get_session(year, 'Silverstone', 'R')
#     session.load(telemetry=True)
        
#     for driver in drivers:
#         driver_data = session.laps.pick_driver(driver)
#         driver_data["Driver"] = driver

#         for _, lap in driver_data.iterlaps():
#             telemetry = lap.get_telemetry()

#             # Add lap-level info to every telemetry sample in this lap
#             telemetry['LapNumber'] = lap['LapNumber']
#             telemetry['LapTime'] = lap['LapTime']
#             telemetry['Compound'] = lap['Compound']
#             telemetry['TyreLife'] = lap['TyreLife']
#             telemetry['Driver'] = driver
#             telemetry['Year'] = year

#             telemetry_frames.append(telemetry)

#     return pd.concat(telemetry_frames, ignore_index=True)

# tasks = [(d, y) for y in years for d in drivers]

# with ThreadPoolExecutor(max_workers=8) as executor:
#     results = list(executor.map(lambda args: process_driver_telemetry(*args), tasks))




Loading 2022 Silverstone Race...


core           INFO 	Loading data for British Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['55', '11', '44', '16', '14', '4', '1', '47', '5', '20', '18', '6', '3', '22', '31', '10', '77', '63', '24', '23']
core           INFO 	Loading data for British Grand Prix - Race

Loading 2023 Silverstone Race...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '4', '44', '81', '63', '11', '14', '23', '16', '55', '2', '77', '27', '18', '24', '22', '21', '10', '20', '31']
core           INFO 	Loading data for British Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info


Loading 2024 Silverstone Race...


req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '1', '4', '81', '55', '27', '18', '14', '23', '22', '2', '20', '3', '16', '77', '31', '11', '24', '63', '10']


In [9]:
telemetry_frames = []

for year, session in sessions.items():
    for driver in drivers:

        if (driver, year) in retirements:
            continue
        driver_data = session.laps.pick_driver(driver)
        driver_data["Driver"] = driver

        for _, lap in driver_data.iterlaps():
            telemetry = lap.get_telemetry()
            telemetry["LapNumber"] = lap["LapNumber"]
            telemetry["LapTime"] = lap["LapTime"]
            telemetry["Compound"] = lap["Compound"]
            telemetry["TyreLife"] = lap["TyreLife"]
            telemetry["Driver"] = driver
            telemetry["Year"] = year

            telemetry_frames.append(telemetry)

driver_telemetry = pd.concat(telemetry_frames, ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  driver_data["Driver"] = driver
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  driver_data["Driver"] = driver
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  driver_data["Driver"] = driver
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = val

In [10]:
driver_telemetry = pd.DataFrame(drop_unecessary_telem(driver_telemetry))

In [11]:
def adjust_laptimes(dataframe: pd.DataFrame):
    dataframe["Laptime"] = pd.to_timedelta(dataframe["LapTime"]).dt.total_seconds()
    dataframe.drop(columns=["LapTime"], inplace=True)  # Converts LapTime to seconds # Drops LapTime and Position columns
    return dataframe

In [12]:
test_laptimes = pd.DataFrame({
    'LapTime': [pd.Timedelta('1 days 2 hours 30 minutes'), 
                 pd.Timedelta('0 days 1 hour 15 minutes 45 seconds')]
})

adjust_laptimes(test_laptimes)

Unnamed: 0,Laptime
0,95400.0
1,4545.0


In [13]:
driver_telemetry = adjust_laptimes(driver_telemetry)

In [14]:
driver_telemetry

Unnamed: 0,DistanceToDriverAhead,RPM,Speed,nGear,Throttle,Brake,DRS,Source,Distance,RelativeDistance,Status,X,Y,Z,LapNumber,Compound,TyreLife,Driver,Year,Laptime
1,0.000000,9991.333333,0.000000,1,11.0,True,1,pos,-0.160919,-0.000029,OnTrack,-1085.000000,2095.000000,1972.000000,1.0,SOFT,1.0,VER,2022,141.969
2,0.000000,9798.333333,0.000000,1,11.0,True,1,pos,-0.100575,-0.000018,OnTrack,-1085.000000,2095.000000,1972.000000,1.0,SOFT,1.0,VER,2022,141.969
3,0.000000,9734.000000,0.000000,1,11.0,False,1,car,0.000000,0.000000,OnTrack,-1084.229877,2095.294048,1972.000211,1.0,SOFT,1.0,VER,2022,141.969
4,0.000000,9034.000000,4.000000,1,12.5,False,1,pos,0.182292,0.000033,OnTrack,-1083.000000,2096.000000,1972.000000,1.0,SOFT,1.0,VER,2022,141.969
5,0.000000,8334.000000,8.000000,1,14.0,False,1,car,0.444444,0.000080,OnTrack,-1082.039582,2097.183755,1971.998367,1.0,SOFT,1.0,VER,2022,141.969
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
917081,929.408056,11003.000000,243.000000,6,100.0,False,0,car,5774.756389,0.990306,OnTrack,-2006.423316,864.971467,1961.943569,50.0,MEDIUM,12.0,OCO,2024,91.608
917082,927.978056,11034.400000,243.900000,6,100.0,False,0,pos,5777.230282,0.990730,OnTrack,-1988.000000,893.000000,1962.000000,50.0,MEDIUM,12.0,OCO,2024,91.608
917083,915.108056,11317.000000,252.000000,6,100.0,False,0,car,5799.956389,0.994628,OnTrack,-1879.550764,1042.145161,1962.872545,50.0,MEDIUM,12.0,OCO,2024,91.608
917084,911.941389,11095.333333,252.316667,6,100.0,False,0,pos,5805.286001,0.995542,OnTrack,-1854.000000,1076.000000,1963.000000,50.0,MEDIUM,12.0,OCO,2024,91.608


### Min Max and Normalize

In [15]:
from sklearn.preprocessing import MinMaxScaler

In [16]:
scaler = MinMaxScaler()

normalize_columns = ['RPM', 'DistanceToDriverAhead', 'Speed', 'Distance', 'X', 'Y', 'Z', 'RPM', 'Throttle', 'RelativeDistance']


driver_telemetry[normalize_columns] = scaler.fit_transform(pd.DataFrame(driver_telemetry[normalize_columns]))



In [17]:
driver_telemetry['Brake'] = driver_telemetry['Brake'].astype(int)

In [18]:
tyre_ohe = OneHotEncoder()

tyre_ohe.fit(np.array(["SOFT", "MEDIUM", "HARD", "INTERMEDIATE", "WET", "-999"]).reshape(-1, 1))


encoded_tyres = tyre_ohe.transform(driver_telemetry['Compound'].astype(str).to_numpy().reshape(-1, 1)).toarray()

encoded_df = pd.DataFrame(
    encoded_tyres,
    columns=tyre_ohe.get_feature_names_out(['Compound']),
    index=driver_telemetry.index
)
driver_telemetry = pd.concat([driver_telemetry.drop(columns=['Compound']), encoded_df], axis=1)

In [20]:
laptime_scaler = MinMaxScaler()

lap_times_label = driver_telemetry.groupby('LapNumber')['Laptime'].max().reset_index()

lap_times_label["Laptime"] = laptime_scaler.fit_transform(pd.DataFrame(lap_times_label["Laptime"]))

In [21]:
lap_times_label

Unnamed: 0,LapNumber,Laptime
0,1.0,1.0
1,2.0,0.045946
2,3.0,0.741585
3,4.0,0.058143
4,5.0,0.060081
5,6.0,0.407184
6,7.0,0.048013
7,8.0,0.046334
8,9.0,0.046528
9,10.0,0.055869


In [22]:
pad_max = (0,0)
avg = 0
for x in range(1,52):
    curr = driver_telemetry[driver_telemetry['LapNumber'] == x].shape[0]
    
    avg += curr
    if curr > pad_max[1]:
        pad_max = (x, curr)
print("Most telemetry points in 1 lap:", pad_max)
print("Average Telemetry Points per Lap:", avg // 51)

Most telemetry points in 1 lap: (2, 22329)
Average Telemetry Points per Lap: 17661


In [23]:
driver_telemetry = driver_telemetry.drop(columns=['Laptime'])

In [24]:
padded_data = []
for x in range(1,52):
    lap_data = driver_telemetry[driver_telemetry['LapNumber'] == x]
    telem_points = driver_telemetry[driver_telemetry['LapNumber'] == x].shape[0]
    if telem_points < pad_max[1]:
           padding = pd.DataFrame(np.full((pad_max[1] - telem_points, driver_telemetry.shape[1]), -999), columns= driver_telemetry.columns)
           padding['LapNumber'] = x
           lap_data = pd.concat([driver_telemetry[driver_telemetry['LapNumber'] == x], padding])
    padded_data.append(lap_data)

driver_telemetry = pd.concat(padded_data, ignore_index= True)

In [27]:
driver_telemetry.head()

Unnamed: 0,DistanceToDriverAhead,RPM,Speed,nGear,Throttle,Brake,DRS,Source,Distance,RelativeDistance,...,LapNumber,TyreLife,Driver,Year,Compound_-999,Compound_HARD,Compound_INTERMEDIATE,Compound_MEDIUM,Compound_SOFT,Compound_WET
0,0.0,0.689413,0.0,1,0.105769,1,1,pos,3.2e-05,3.1e-05,...,1.0,1.0,VER,2022,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.671197,0.0,1,0.105769,1,1,pos,4.2e-05,4.2e-05,...,1.0,1.0,VER,2022,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.665125,0.0,1,0.105769,0,1,car,6e-05,6e-05,...,1.0,1.0,VER,2022,0.0,0.0,0.0,0.0,1.0,0.0
3,0.0,0.599056,0.011799,1,0.120192,0,1,pos,9.1e-05,9.3e-05,...,1.0,1.0,VER,2022,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.532987,0.023599,1,0.134615,0,1,car,0.000135,0.00014,...,1.0,1.0,VER,2022,0.0,0.0,0.0,0.0,1.0,0.0


In [64]:
model = keras.Sequential(
    [
        layers.Masking(mask_value= -999.0),
        
        layers.LSTM(64, return_sequences= False),

        layers.Dense(32, activation= 'relu'),
        layers.Dense(1),
    ]
)      

In [65]:
driver_telemetry = driver_telemetry.drop(columns=['Source', 'Status'])

In [66]:
X = []

for lap_num, lap_df in driver_telemetry.groupby("LapNumber"):
    lap_array = lap_df.values  # shape: (952, num_features)
    X.append(lap_array)

X = np.array(X)  # shape: (num_laps, 952, num_features)


y = np.array(lap_times_label['Laptime'])

In [None]:
print(X.shape)
print(y.shape)

In [None]:
X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.float32)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.70588235294, random_state=42)

In [None]:
model.compile(
    optimizer='adam',
    loss='mse',         # Mean Squared Error is standard for regression
    metrics=['mae']     # Tracks Mean Absolute Error during training
)


In [None]:
model.fit(X_train, y_train,
          epochs=20,
          batch_size=8,
          shuffle=False,
          validation_data=(X_test, y_test))

In [None]:
y_pred = laptime_scaler.inverse_transform(pd.DataFrame(model.predict(X_test)))

y_true = laptime_scaler.inverse_transform(pd.DataFrame(y_test))

In [None]:
plt.plot(y_pred, color = 'red')
plt.plot(y_true, color = 'blue')