In [None]:
import pandas as pd
import fastf1
import os

In [None]:
qualified = pd.read_csv('../data/F1_2025_Dataset/F1_2025_QualifyingResults.csv')
race = pd.read_csv('../data/F1_2025_Dataset/F1_2025_RaceResults.csv')
sprint_qualified = pd.read_csv('../data/F1_2025_Dataset/F1_2025_SprintQualifyingResults.csv')
sprint= pd.read_csv('../data/F1_2025_Dataset/F1_2025_SprintResults.csv')

In [None]:
os.makedirs("../data/fastf1_cache", exist_ok=True)
fastf1.Cache.enable_cache("../data/fastf1_cache")

In [None]:
session_2021 = fastf1.get_session(2021, "Qatar", "Q")
session_2021.load()
laps_2021 = session_2021.laps

In [None]:
clean_laps_2021 = laps_2021[
    (laps_2021['LapTime'].notna()) &
    (laps_2021['IsAccurate'] == True) &
    (laps_2021['Deleted'] == False)
].copy()

clean_laps_2021['QatarLap_2021'] = clean_laps_2021['LapTime'].dt.total_seconds()

In [None]:
best_laps_2021 = (
    clean_laps_2021
    .sort_values('QatarLap_2021')
    .groupby('Driver', as_index=False)
    .first()                   
    .sort_values('QatarLap_2021') 
    .reset_index(drop=True)
)

In [None]:
fastf1.Cache.enable_cache("../data/fastf1_cache")
session_2023 = fastf1.get_session(2023, "Qatar", "Q")
session_2023.load()
laps_2023 = session_2023.laps

In [None]:
clean_laps_2023 = laps_2023[
    (laps_2023['LapTime'].notna()) &
    (laps_2023['IsAccurate'] == True) &
    (laps_2023['Deleted'] == False)
].copy()

clean_laps_2023['QatarLap_2023'] = clean_laps_2023['LapTime'].dt.total_seconds()

In [None]:
best_laps_2023 = (
    clean_laps_2023
    .sort_values('QatarLap_2023')
    .groupby('Driver', as_index=False)
    .first()                   
    .sort_values('QatarLap_2023') 
    .reset_index(drop=True)
)

In [None]:
qatar_all_years = best_laps_2021[['Driver','QatarLap_2021']].merge(
    best_laps_2023[['Driver','QatarLap_2023']],
    on="Driver",
    how="outer"
)

In [None]:
qatar_all_years = qatar_all_years.rename(columns={"Driver": "DriverCode"})

In [None]:
qatar_all_years["QatarLapTime"] = qatar_all_years[["QatarLap_2021", "QatarLap_2023"]].mean(axis=1)

In [None]:
for df in [qualified, race, sprint, sprint_qualified]:
    df["Position"] = pd.to_numeric(df["Position"], errors="coerce")

In [None]:
avg_qualified = qualified.groupby("Driver", as_index=False)["Position"].mean()
avg_qualified.rename(columns={"Position": "AvgQualiPos_2025"}, inplace=True)

In [None]:
avg_race = race.groupby("Driver", as_index=False)["Position"].mean()
avg_race.rename(columns={"Position": "AvgRacePos_2025"}, inplace=True)

In [None]:
avg_sprint = sprint.groupby("Driver", as_index=False)["Position"].mean()
avg_sprint.rename(columns={"Position": "AvgSprintPos_2025"}, inplace=True)

In [None]:
avg_sprint_qualified = sprint_qualified.groupby("Driver", as_index=False)["Position"].mean()
avg_sprint_qualified.rename(columns={"Position": "AvgSprintQualiPos_2025"}, inplace=True)

In [None]:
drivers_2025 = qualified[["Driver", "Team"]].drop_duplicates().reset_index(drop=True)

In [None]:
season_features_2025 = drivers_2025.merge(avg_qualified, on="Driver", how="left")
season_features_2025 = season_features_2025.merge(avg_race, on="Driver", how="left")
season_features_2025 = season_features_2025.merge(avg_sprint_qualified, on="Driver", how="left")
season_features_2025 = season_features_2025.merge(avg_sprint, on="Driver", how="left")

In [None]:
driver_map = {
    "Max Verstappen": "VER",
    "Sergio Pérez": "PER",
    "Lewis Hamilton": "HAM",
    "George Russell": "RUS",
    "Lando Norris": "NOR",
    "Oscar Piastri": "PIA",
    "Charles Leclerc": "LEC",
    "Carlos Sainz": "SAI",
    "Fernando Alonso": "ALO",
    "Lance Stroll": "STR",
    "Esteban Ocon": "OCO",
    "Pierre Gasly": "GAS",
    "Valtteri Bottas": "BOT",
    "Guanyu Zhou": "ZHO",
    "Yuki Tsunoda": "TSU",
    "Daniel Ricciardo": "RIC",
    "Kevin Magnussen": "MAG",
    "Nico Hulkenberg": "HUL",
    "Nico Hülkenberg": "HUL",
    "Alexander Albon": "ALB",
    "Alex Albon": "ALB",
    "Logan Sargeant": "SAR",
    "Isack Hadjar": "HAD",
    "Jack Doohan": "DOO",
    "Gabriel Bortoleto": "BOR",
    "Kimi Antonelli": "ANT",
    "Liam Lawson": "LAW",
    "Oliver Bearman": "BEA",
    "Franco Colapinto": "COL",
    "Franco Colapintop": "COL",       
}

In [None]:
season_features_2025["DriverCode"] = season_features_2025["Driver"].map(driver_map)

In [None]:
season_features_2025 = season_features_2025.merge(
    qatar_all_years[["DriverCode", "QatarLapTime"]],
    on="DriverCode",
    how="left"
)

In [None]:
season_features_2025.to_csv("../data/qatar2025_f1_data.csv", index=False)