In [5]:
import pandas as pd

df = pd.read_csv("data.csv")
df = df[df["IsPushLap"] == 1]

GROUP_COLS = [
    "Event",
    "CircuitName",
    "Country",
    "TrackType",
    "LapSpeedClass"
]

NUMERIC_COLS = [
    "SpeedI1", "SpeedI2", "SpeedFL", "SpeedST",
    "TrackLength_m", "NumCorners", "CornerDensity",
    "AvgCornerSpacing_m", "AirTemp", "TrackTemp",
    "WindSpeed", "Altitude_m", "DRSZones"
]

circuit_medians = (
    df.groupby(GROUP_COLS)[NUMERIC_COLS]
      .median()
      .reset_index()
)

circuit_medians.to_csv("circuit_medians.csv", index=False)

print("✅ circuit_medians.csv rebuilt correctly")

✅ circuit_medians.csv rebuilt correctly


In [4]:
import pandas as pd
from catboost import CatBoostRegressor

# -----------------------------
# Load model
# -----------------------------
model = CatBoostRegressor()
model.load_model("quali_q3_delta_model.cbm")

# -----------------------------
# Load medians
# -----------------------------
medians = pd.read_csv("circuit_medians.csv")

# -----------------------------
# EXACT feature order from training
# -----------------------------
categorical_features = [
    "Driver", "Team", "Compound", "Event", "Session",
    "QualiSegment", "CircuitName", "Country",
    "TrackType", "LapSpeedClass",
    "Driver_Track", "Team_Track"
]

numeric_features = [
    "TyreLife", "SpeedI1", "SpeedI2", "SpeedFL", "SpeedST",
    "TrackLength_m", "NumCorners", "CornerDensity",
    "AvgCornerSpacing_m", "AirTemp", "TrackTemp",
    "WindSpeed", "Altitude_m", "DRSZones"
]

features = categorical_features + numeric_features

def predict_quali_time(
    driver,
    team,
    event,
    quali_segment,
    compound="SOFT",
    session="Q"
):
    # -----------------------------
    # Fetch median row
    # -----------------------------
    row = medians[
        (medians["Event"] == event) &
        (medians["QualiSegment"] == quali_segment)
    ]

    if row.empty:
        raise ValueError(f"No median data found for {event} {quali_segment}")

    row = row.iloc[0]
    session_median = row["SessionMedianLap"]

    # -----------------------------
    # Build input row
    # -----------------------------
    input_data = {
        "Driver": driver,
        "Team": team,
        "Compound": compound,
        "Event": event,
        "Session": session,
        "QualiSegment": quali_segment,

        "CircuitName": row["CircuitName"],
        "Country": row["Country"],
        "TrackType": row["TrackType"],
        "LapSpeedClass": row["LapSpeedClass"],

        "Driver_Track": f"{driver}_{row['CircuitName']}",
        "Team_Track": f"{team}_{row['CircuitName']}",

        "TyreLife": 2,
        "SpeedI1": row["SpeedI1"],
        "SpeedI2": row["SpeedI2"],
        "SpeedFL": row["SpeedFL"],
        "SpeedST": row["SpeedST"],
        "TrackLength_m": row["TrackLength_m"],
        "NumCorners": row["NumCorners"],
        "CornerDensity": row["CornerDensity"],
        "AvgCornerSpacing_m": row["AvgCornerSpacing_m"],
        "AirTemp": row["AirTemp"],
        "TrackTemp": row["TrackTemp"],
        "WindSpeed": row["WindSpeed"],
        "Altitude_m": row["Altitude_m"],
        "DRSZones": row["DRSZones"],
    }

    X = pd.DataFrame([input_data])[features]

    # -----------------------------
    # Predict
    # -----------------------------
    predicted_delta = model.predict(X)[0]
    predicted_lap_time = session_median + predicted_delta

    return round(predicted_lap_time, 3)

# -----------------------------
# Test
# -----------------------------
print(
    predict_quali_time(
        driver="Max Verstappen",
        team="Red Bull Racing",
        event="Australian Grand Prix",
        quali_segment="Q3"
    )
)


79.522
