In [None]:
import pandas as pd
from catboost import CatBoostRegressor, Pool
from sklearn.metrics import mean_absolute_error

# -----------------------------
# Load data
# -----------------------------
df = pd.read_csv("data.csv")

# Use only push laps
df = df[df["IsPushLap"] == 1].copy()

# -----------------------------
# Create POLE TIME per session
# -----------------------------
group_cols = ["Year", "Event", "Session", "QualiSegment"]

pole_times = (
    df.groupby(group_cols)["LapTime_sec"]
    .min()
    .reset_index()
    .rename(columns={"LapTime_sec": "PoleTime_sec"})
)

df = df.merge(pole_times, on=group_cols, how="left")

# -----------------------------
# Target: delta to pole
# -----------------------------
df["delta_to_pole"] = df["LapTime_sec"] - df["PoleTime_sec"]

# -----------------------------
# Sort for rolling features
# -----------------------------
df = df.sort_values(["Driver", "Year", "Event"])

# -----------------------------
# Driver recent form (last 5)
# -----------------------------
df["driver_avg_delta_last5"] = (
    df.groupby("Driver")["delta_to_pole"]
    .shift(1)
    .rolling(5)
    .mean()
)

# -----------------------------
# Team recent form (last 5)
# -----------------------------
df["team_avg_delta_last5"] = (
    df.groupby("Team")["delta_to_pole"]
    .shift(1)
    .rolling(5)
    .mean()
)

# -----------------------------
# Driver vs teammate gap
# -----------------------------
df["team_session_avg"] = (
    df.groupby(["Year", "Event", "Session"]()
