In [None]:
# GOAL: is to create a decent dataset to be able to bring a XGBoost model to predict lap time

In [None]:
import fastf1

In [None]:
session = fastf1.get_session(2024, "Hungary", "R")  # year, grand prix, R=race  
session.load()

# 24 races in 2025 season: need to find a way to pull all race data, i'm thinking a 

In [None]:
laps = session.laps  # pull the laps information
# drivers = session.drivers  # get list of drivers
# drivers = [session.get_driver(driver)["Abbreviation"] for driver in drivers]

**Data Tables**
- track_features (track_name, lap_length_km, average_deg_rate, pit_loss_seconds, overtaking_difficulty)
- tire_compounds (compound, initial_grip, degradation_rate, cliff_lap)
- race_conditions (track_temp_air_temp, rain_probability, safety_car_probability)
- lap_times (lap_number, compound, tire_age (stint), lap_time)

In [None]:
laps.columns

In [None]:
# lap times dataframe
laps.copy()  # columns: driver, team, lap_number, stint, compound, tire_age, lap_time
lap_times_df = laps[["Driver", "Team", "LapNumber", "Stint", "Compound", "TyreLife","LapTime", "PitInTime", "PitOutTime"]]
lap_times_df = lap_times_df.rename(columns={
    "Driver": "driver",
    "Team": "team",
    "LapNumber": "lap_number",
    "Stint": "stint",
    "Compound": "compound",
    "TyreLife": "tyre_age",
    "LapTime": "lap_time"
    })

# convert lap time to seconds
lap_times_df["lap_time_sec"] = lap_times_df["lap_time"].dt.total_seconds()
lap_times_df = lap_times_df.drop(columns=["lap_time"])

# flag pit laps
lap_times_df["is_inlap"] = lap_times_df["PitInTime"].notna()
lap_times_df["is_outlap"] = lap_times_df["PitOutTime"].notna()
lap_times_df  # need to add: race_id, year, round, track


In [None]:
session.weather_data

In [None]:
# races race_id, year, round, race_name, track, country, total_laps, lap_length_km, pit_loss_sec
race_data_pull = session.event
race_data_pull.copy()

races_df = race_data_pull[["RoundNumber", "EventName", "Location", ""]]

In [None]:
# fastf1.Cache.enable_cache("./cache/fastf1")

In [None]:
import pandas as pd

# loop over all races
year = 2024
all_laps = []
schedule = fastf1.get_event_schedule(year)

for _, event in schedule.iterrows():
    round_no = event["RoundNumber"]
    race_name = event["EventName"]

    try:
        session = fastf1.get_session(year, round_no, "R")
        session.load(laps=True, weather=True)

        laps = session.laps.copy()

        if laps.empty:
            print(f"No lap data for {race_name}")
            continue

        laps["year"] = year
        laps["round"] = round_no
        laps["track"] = race_name
        laps["race_id"] = laps["year"].astype(str) + "_" + laps["round"].astype(str) + "_" + laps["track"].astype(str)

        all_laps.append(laps)

        print(f"Loaded {race_name}")

    except Exception as e:
        print(f"Failed {race_name}: {e}")

laps = pd.concat(all_laps, ignore_index=True)

In [None]:
all_laps

In [None]:
import os

os.path.isdir("../data")