In [5]:
import pandas as pd

df= pd.read_csv("../data/qatar2025_f1_data.csv")

In [6]:
df["Driver"] = df["Driver"].replace({
    "Franco Colapintop": "Franco Colapinto"
})

In [7]:
df = df.sort_values(
    by=df.columns.tolist(), 
    ascending=False
).drop_duplicates(subset=["Driver"], keep="first").reset_index(drop=True)

In [8]:
clean_df = df.drop(columns=[
    "QatarLapTime_x",
    "QatarLapTime_y",
    "QatarLap_2021",
    "QatarLap_2023"
], errors="ignore")

In [9]:
columns = [
    "AvgQualiPos_2025",
    "AvgRacePos_2025",
    "AvgSprintQualiPos_2025",
    "AvgSprintPos_2025"
]

for col in columns:
    clean_df[col] = clean_df[col].fillna(clean_df[col].mean())

In [10]:
for col in columns:
    mean = clean_df[col].mean()
    std = clean_df[col].std()
    clean_df[col + "_z"] = (clean_df[col] - mean) / std

In [11]:
for col in columns:
    clean_df[col + "_z_inv"] = -clean_df[col + "_z"]

In [12]:
qatar_mean = clean_df["QatarLapTime"].mean()
qatar_std = clean_df["QatarLapTime"].std()

In [13]:
clean_df["QatarLapTime_z"] = (clean_df["QatarLapTime"] - qatar_mean) / qatar_std

In [14]:
clean_df["QatarPace_z_inv"] = -clean_df["QatarLapTime_z"]

In [15]:
w_quali = 1.0
w_race = 1.0
w_sprint_quali = 0.7
w_sprint = 0.7
w_qatar = 1.3 

In [16]:
clean_df["QatarPace_z_inv_filled"] = clean_df["QatarPace_z_inv"].fillna(0.0)

In [17]:
clean_df["PerformanceScore"] = (
    w_quali        * clean_df["AvgQualiPos_2025_z_inv"] +
    w_race         * clean_df["AvgRacePos_2025_z_inv"] +
    w_sprint_quali * clean_df["AvgSprintQualiPos_2025_z_inv"] +
    w_sprint       * clean_df["AvgSprintPos_2025_z_inv"] +
    w_qatar        * clean_df["QatarPace_z_inv_filled"]
)

In [22]:
results_df = clean_df[["Driver", "Team", "PerformanceScore"]].copy()

ranking = (
     results_df.sort_values("PerformanceScore", ascending=False)
      .reset_index(drop=True)
)

ranking.head(15)

Unnamed: 0,Driver,Team,PerformanceScore
0,Lando Norris,McLaren Mercedes,5.592266
1,Max Verstappen,Red Bull Racing honda RBPT,5.404963
2,Oscar Piastri,McLaren Mercedes,4.997976
3,Lewis Hamilton,Ferrari,4.771522
4,George Russell,Mercedes,4.414175
5,Charles Leclerc,Ferrari,4.133009
6,Kimi Antonelli,Mercedes,2.085439
7,Fernando Alonso,Aston Martin Aramco Mercedes,0.208979
8,Isack Hadjar,Racing bulls Honda RBPT,-0.159453
9,Yuki Tsunoda,Red Bull Racing Honda RBPT,-0.18367


In [23]:
team_view = (
    ranking[["Team", "Driver", "PerformanceScore"]]
    .sort_values(["Team", "PerformanceScore"], ascending=[True, False])
)

team_view["TeamRank"] = team_view.groupby("Team")["PerformanceScore"].rank(ascending=False, method="first")

team_view

Unnamed: 0,Team,Driver,PerformanceScore,TeamRank
10,Alpine Renault,Pierre Gasly,-0.219693,1.0
13,Alpine Renault,Franco Colapinto,-1.630622,2.0
17,Alpine Renault,Jack Doohan,-4.181812,3.0
7,Aston Martin Aramco Mercedes,Fernando Alonso,0.208979,1.0
15,Aston Martin Aramco Mercedes,Lance Stroll,-2.560539,2.0
3,Ferrari,Lewis Hamilton,4.771522,1.0
5,Ferrari,Charles Leclerc,4.133009,2.0
14,Haas Ferrari,Esteban Ocon,-1.932388,1.0
16,Haas Ferrari,Oliver Bearman,-3.321877,2.0
18,Kick Sauber Ferrari,Gabriel Bortoleto,-4.402847,1.0


In [24]:
ranking.to_csv("../results/qatar2025_driver_predictions.csv", index=False)
team_view.to_csv("../results/qatar2025_team_view.csv", index=False)

In [25]:
clean_df.to_csv("../data/clean_df.csv", index=False)