In [1]:
import pandas as pd
import fastf1
import os

In [2]:
qualified = pd.read_csv('../data/F1_2025_Dataset/F1_2025_QualifyingResults.csv')
race = pd.read_csv('../data/F1_2025_Dataset/F1_2025_RaceResults.csv')
sprint_qualified = pd.read_csv('../data/F1_2025_Dataset/F1_2025_SprintQualifyingResults.csv')
sprint= pd.read_csv('../data/F1_2025_Dataset/F1_2025_SprintResults.csv')

In [3]:
os.makedirs("../data/fastf1_cache", exist_ok=True)
fastf1.Cache.enable_cache("../data/fastf1_cache")

In [4]:
session_2021 = fastf1.get_session(2021, "Qatar", "Q")
session_2021.load()
laps_2021 = session_2021.laps

core           INFO 	Loading data for Qatar Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '33', '77', '10', '14', '4', '55', '22', '31', '5', '11', '18', '16', '3', '63', '7', '6', '99', '47', '9']


In [5]:
clean_laps_2021 = laps_2021[
    (laps_2021['LapTime'].notna()) &
    (laps_2021['IsAccurate'] == True) &
    (laps_2021['Deleted'] == False)
].copy()

clean_laps_2021['QatarLap_2021'] = clean_laps_2021['LapTime'].dt.total_seconds()

In [6]:
best_laps_2021 = (
    clean_laps_2021
    .sort_values('QatarLap_2021')
    .groupby('Driver', as_index=False)
    .first()                   
    .sort_values('QatarLap_2021') 
    .reset_index(drop=True)
)

In [7]:
fastf1.Cache.enable_cache("../data/fastf1_cache")
session_2023 = fastf1.get_session(2023, "Qatar", "Q")
session_2023.load()
laps_2023 = session_2023.laps

core           INFO 	Loading data for Qatar Grand Prix - Qualifying [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '63', '44', '14', '16', '81', '10', '31', '77', '4', '22', '55', '11', '23', '27', '2', '18', '40', '20', '24']


In [8]:
clean_laps_2023 = laps_2023[
    (laps_2023['LapTime'].notna()) &
    (laps_2023['IsAccurate'] == True) &
    (laps_2023['Deleted'] == False)
].copy()

clean_laps_2023['QatarLap_2023'] = clean_laps_2023['LapTime'].dt.total_seconds()

In [9]:
best_laps_2023 = (
    clean_laps_2023
    .sort_values('QatarLap_2023')
    .groupby('Driver', as_index=False)
    .first()                   
    .sort_values('QatarLap_2023') 
    .reset_index(drop=True)
)

In [10]:
qatar_years = best_laps_2021[['Driver','QatarLap_2021']].merge(
    best_laps_2023[['Driver','QatarLap_2023']],
    on="Driver",
    how="outer"
)

In [11]:
qatar_years = qatar_years.rename(columns={"Driver": "DriverCode"})

In [12]:
qatar_years["QatarLapTime"] = qatar_years[["QatarLap_2021", "QatarLap_2023"]].mean(axis=1)

In [13]:
for df in [qualified, race, sprint, sprint_qualified]:
    df["Position"] = pd.to_numeric(df["Position"], errors="coerce")

In [14]:
avg_qualified = qualified.groupby("Driver", as_index=False)["Position"].mean()
avg_qualified.rename(columns={"Position": "AvgQualiPos_2025"}, inplace=True)

In [15]:
avg_race = race.groupby("Driver", as_index=False)["Position"].mean()
avg_race.rename(columns={"Position": "AvgRacePos_2025"}, inplace=True)

In [16]:
avg_sprint = sprint.groupby("Driver", as_index=False)["Position"].mean()
avg_sprint.rename(columns={"Position": "AvgSprintPos_2025"}, inplace=True)

In [17]:
avg_sprint_qualified = sprint_qualified.groupby("Driver", as_index=False)["Position"].mean()
avg_sprint_qualified.rename(columns={"Position": "AvgSprintQualiPos_2025"}, inplace=True)

In [18]:
drivers_2025 = qualified[["Driver", "Team"]].drop_duplicates().reset_index(drop=True)

In [19]:
data_2025 = drivers_2025.merge(avg_qualified, on="Driver", how="left")
data_2025 = data_2025.merge(avg_race, on="Driver", how="left")
data_2025 = data_2025.merge(avg_sprint_qualified, on="Driver", how="left")
data_2025 = data_2025.merge(avg_sprint, on="Driver", how="left")

In [20]:
driver_map = {
    "Max Verstappen": "VER",
    "Sergio Pérez": "PER",
    "Lewis Hamilton": "HAM",
    "George Russell": "RUS",
    "Lando Norris": "NOR",
    "Oscar Piastri": "PIA",
    "Charles Leclerc": "LEC",
    "Carlos Sainz": "SAI",
    "Fernando Alonso": "ALO",
    "Lance Stroll": "STR",
    "Esteban Ocon": "OCO",
    "Pierre Gasly": "GAS",
    "Valtteri Bottas": "BOT",
    "Guanyu Zhou": "ZHO",
    "Yuki Tsunoda": "TSU",
    "Daniel Ricciardo": "RIC",
    "Kevin Magnussen": "MAG",
    "Nico Hulkenberg": "HUL",
    "Nico Hülkenberg": "HUL",
    "Alexander Albon": "ALB",
    "Alex Albon": "ALB",
    "Logan Sargeant": "SAR",
    "Isack Hadjar": "HAD",
    "Jack Doohan": "DOO",
    "Gabriel Bortoleto": "BOR",
    "Kimi Antonelli": "ANT",
    "Liam Lawson": "LAW",
    "Oliver Bearman": "BEA",
    "Franco Colapinto": "COL",
    "Franco Colapintop": "COL",       
}

In [21]:
data_2025["DriverCode"] = data_2025["Driver"].map(driver_map)

In [22]:
data_2025 = data_2025.merge(
    qatar_years[["DriverCode", "QatarLapTime"]],
    on="DriverCode",
    how="left"
)

In [23]:
data_2025["Driver"] = data_2025["Driver"].str.strip()

In [24]:
data_2025 = data_2025[data_2025["Driver"] != "Franco Colapintop"]

In [25]:
data_2025 = data_2025.drop_duplicates(subset="Driver", keep="first")

In [26]:
data_2025.to_csv("../results/qatar2025_f1_data.csv", index=False)