In [1]:
import fastf1 as f1
import pandas as pd

f1.Cache.enable_cache('cache')

In [2]:
year = 2023

event_schedule = f1.get_event_schedule(year, include_testing=False)[["EventName", "EventFormat"]]

In [3]:
laps = []
for index, row in event_schedule.iterrows():
    event = f1.get_event(year, row["EventName"])
    race_session = event.get_race()
    race_session.load()
    total_laps = race_session.total_laps
    event_laps = race_session.laps.copy()
    event_laps = event_laps.drop(columns = ['Time', 'PitOutTime', 'PitInTime', 'Sector1Time', 'Sector2Time', 'Sector3Time', 'Sector1SessionTime', 'Sector2SessionTime', 'Sector3SessionTime', 'DriverNumber', 'Deleted', 'DeletedReason', 'FastF1Generated', 'IsAccurate', 'LapStartTime', 'LapStartDate'])

    
    weather_data = []
    car_data = []
    for _, lap in race_session.laps.iterlaps():
        weather_data.append(lap.get_weather_data())
        car_data_point = lap.get_car_data()
        car_data_summary = car_data_point.agg({
            'Speed': ['mean', 'median', 'min', 'max', 'std'],
            'RPM': ['mean', 'median', 'min', 'max', 'std'],
            'Throttle': ['mean', 'median', 'min', 'max', 'std'],
            'nGear': ['mean', 'median', 'min', 'max', 'std'],
            'Brake': ['mean', 'median']
        }).unstack()
        car_data_summary.index = ["_".join(i) for i in car_data_summary.index]
        car_data.append(car_data_summary.T)
        
    weather_df = pd.DataFrame(weather_data)
    car_df = pd.DataFrame(car_data)
    weather_df = weather_df.drop(columns=['Time'])
    car_df = car_df.drop(columns=['Brake_min', 'Brake_max', "Brake_std"])
    event_laps = pd.concat([event_laps.reset_index(drop=True), weather_df.reset_index(drop=True), car_df.reset_index(drop=True)], axis=1)
    
    laps.append(event_laps)
    
laps_df = pd.concat(laps, ignore_index=True)


core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.4.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '14', '55', '44', '18', '63', '77', '10', '23', '22', '2', '20', '21', '27', '24', '4', '31', '16', '81']
core           INFO 	Loading data for Saudi Arabian Grand Pri

In [4]:
laps_df.dtypes

Driver                      object
LapTime            timedelta64[ns]
LapNumber                  float64
Stint                      float64
SpeedI1                    float64
SpeedI2                    float64
SpeedFL                    float64
SpeedST                    float64
IsPersonalBest              object
Compound                    object
TyreLife                   float64
FreshTyre                     bool
Team                        object
TrackStatus                 object
Position                   float64
AirTemp                    float64
Humidity                   float64
Pressure                   float64
Rainfall                      bool
TrackTemp                  float64
WindDirection                int64
WindSpeed                  float64
Speed_mean                 float64
Speed_median               float64
Speed_min                  float64
Speed_max                  float64
Speed_std                  float64
RPM_mean                   float64
RPM_median          

In [5]:
laps_df = laps_df.rename(columns = {
    'SpeedI1': 'SpeedAfterSector1',
    'SpeedI2': 'SpeedAfterSector2',
    'SpeedFL': 'SpeedAtFinishLine',
    'SpeedST': 'SpeedAtLongestStraight',
    'TyreLife': 'LapsOnTyre',
    'FreshTrye': 'isFreshTyre',
    })

In [6]:
def clean(df):
    columns_with_null = df.isnull().any()[df.isnull().any() == True]
    for column in list(columns_with_null.index):
        df = df[df[column].notna()]
    return df

laps_df = clean(laps_df)

In [7]:
laps_df[laps_df.duplicated() == 1]

Unnamed: 0,Driver,LapTime,LapNumber,Stint,SpeedAfterSector1,SpeedAfterSector2,SpeedAtFinishLine,SpeedAtLongestStraight,IsPersonalBest,Compound,...,Throttle_min,Throttle_max,Throttle_std,nGear_mean,nGear_median,nGear_min,nGear_max,nGear_std,Brake_mean,Brake_median


In [8]:
laps_df['LapTime'] = laps_df['LapTime'].dt.total_seconds()

In [9]:
def contains_track_status(row):
    for i in range(1,8):
        row[f'ContainsTrackStatus{i}'] = str(i) in row['TrackStatus']
    return row


In [10]:
laps_df = laps_df.apply(contains_track_status, axis=1)
laps_df = laps_df.drop(columns=['TrackStatus'])

In [11]:
def wind_direction_cat(row):
    bearing = row['WindDirection']
    if bearing < 45 or bearing > 315:
        direction = 'North'
    elif bearing < 135:
        direction = 'East'
    elif bearing < 225:
        direction = 'South'
    else:
        direction = 'West'
    row['DirectionOfWind'] = direction
    return row

In [12]:
laps_df = laps_df.apply(wind_direction_cat, axis=1)
laps_df = laps_df.drop(columns=['WindDirection'])

In [13]:
laps_df.to_csv('../data/laps.csv', index=False)
laps_df.to_pickle('../data/laps.pkl')