In [1]:
import fastf1
import pandas as pd
from collections import defaultdict

# Enable cache
fastf1.Cache.enable_cache('cache')


In [22]:
REAL_2025_EVENTS = [
    "Australian Grand Prix",
    "Chinese Grand Prix",
    "Japanese Grand Prix",
    "Bahrain Grand Prix",
    "Saudi Arabian Grand Prix",
    "Miami Grand Prix",
    "Emilia Romagna Grand Prix",
    "Monaco Grand Prix",
    "Spanish Grand Prix",
    "Canadian Grand Prix",
    "Austrian Grand Prix",
    "British Grand Prix",
    "Belgian Grand Prix",
    "Hungarian Grand Prix",
    "Dutch Grand Prix",
    "Italian Grand Prix",
    "Azerbaijan Grand Prix",
    "Singapore Grand Prix",
    "Mexico City Grand Prix",
    "Las Vegas Grand Prix",
    "Abu Dhabi Grand Prix"
]

In [None]:
import fastf1
import pandas as pd
from collections import defaultdict

fastf1.Cache.enable_cache('cache')

driver_debut_years_2025 = {
    "VER": 2015, 
    "LAW": 2022,
    "LEC": 2018,
    "HAM": 2007,
    "RUS": 2019,
    "ANT": 2025,
    "NOR": 2019,
    "PIA": 2023,
    "ALO": 2001,
    "STR": 2017,
    "GAS": 2017,
    "DOO": 2025,
    "OCO": 2016,
    "BER": 2025,
    "HUL": 2010,
    "BOT": 2013,
    "ALB": 2015,
    "SAR": 2025,
    "TSU": 2021,
    "RIC": None,
    "HAD": 2025,
    "BOR": 2025 
}

def build_features_until_race(year: int, target_race: str, verbose: bool = False) -> pd.DataFrame:
    # load schedule and filter real races
    schedule = REAL_2025_EVENTS


    # accumulators
    driver_points = defaultdict(float)
    constructor_points = defaultdict(float)
    driver_wins = defaultdict(int)
    driver_podiums = defaultdict(int)
    driver_dnf = defaultdict(int)
    driver_quali_positions = defaultdict(list)
    driver_info = {}

    rows = []

    for idx, ev in schedule.iterrows():
        race_name = ev['EventName']
        if verbose:
            print(f"\nProcessing race: {race_name} (index {idx})")

        if race_name == target_race:
            # Build feature rows before this race
            if verbose:
                print("Reached target race. Building features before this race.")
            for dn, (abbr, team) in driver_info.items():
                races_so_far = len(driver_quali_positions[dn])
                win_rate = driver_wins[dn] / races_so_far if races_so_far > 0 else 0.0
                podium_rate = driver_podiums[dn] / races_so_far if races_so_far > 0 else 0.0
                avg_quali = sum(driver_quali_positions[dn]) / races_so_far if races_so_far > 0 else 0.0
                exp_years = year - driver_experience_years.get(abbr, year)
                dnf_count = driver_dnf[dn]

                # teammate comparison
                teammate = [d for d, (_, t) in driver_info.items() if t == team and d != dn]
                teammate_points = driver_points.get(teammate[0], 0.0) if teammate else 0.0
                teammate_comparison = driver_points.get(dn, 0.0) - teammate_points

                rows.append({
                    "Driver": abbr,
                    "Constructor": team,
                    "RaceName": race_name,
                    "DriverPointsSoFar": driver_points.get(dn, 0.0),
                    "ConstructorPointsSoFar": constructor_points.get(team, 0.0),
                    "NextRacePosition": None,
                    "DriverWinRateThisSeason": win_rate,
                    "DriverPodiumRate": podium_rate,
                    "AverageQualifyingPosition": avg_quali,
                    "DriverExperienceYears": exp_years,
                    "DNFCountThisSeason": dnf_count,
                    "TeammateComparison": teammate_comparison
                })
            break  # stop after target race

        # Else, update stats using this race
        # Load race results
        try:
            ses = fastf1.get_session(year, race_name, 'R')
            ses.load()
            res = ses.results.copy()
            res['Points'] = pd.to_numeric(res['Points'], errors='coerce').fillna(0.0)
        except Exception as e:
            if verbose:
                print(f"  Could not load race {race_name}: {e}")
            continue

        # Qualifying positions
        quali_positions = {}
        try:
            q = fastf1.get_session(year, race_name, 'Q')
            q.load()
            qres = q.results.copy()
            for _, row in qres.iterrows():
                dn = int(row['DriverNumber'])
                if not pd.isna(row['Position']):
                    quali_positions[dn] = int(row['Position'])
        except Exception:
            if verbose:
                print("  No qualifying session or error.")

        if verbose:
            print("  Quali positions:", quali_positions)

        for _, row in res.iterrows():
            dn = int(row['DriverNumber'])
            abbr = row['Abbreviation']
            team = row['TeamName']
            pts = float(row['Points'])
            pos = row['Position']
            status = str(row['Status']).lower()

            driver_info[dn] = (abbr, team)
            driver_points[dn] += pts
            constructor_points[team] += pts

            if pos == 1:
                driver_wins[dn] += 1
            if pos in [1, 2, 3]:
                driver_podiums[dn] += 1
            if status not in ["finished", "classified finish"]:
                driver_dnf[dn] += 1

            # add qualifying position if known
            if dn in quali_positions:
                driver_quali_positions[dn].append(quali_positions[dn])

        if verbose:
            print("  After race, driver_points:", {dn: driver_points[dn] for dn in driver_info.keys()})
            print("  DNF counts:", {dn: driver_dnf[dn] for dn in driver_info.keys()})

    df = pd.DataFrame(rows)
    if df.empty and verbose:
        print("Warning: got empty rows. Perhaps target_race name mismatch.")
    return df


if __name__ == "__main__":
    # Example: build features before “Singapore Grand Prix” in 2025
    df = build_features_until_race(2025, "Singapore Grand Prix", verbose=True)
    print(df.head(20))
    print("Total drivers:", len(df))


AttributeError: 'list' object has no attribute 'iterrows'

In [20]:
df

Unnamed: 0,Driver,Constructor,RaceName,DriverPointsSoFar,ConstructorPointsSoFar,NextRacePosition,DriverWinRateThisSeason,DriverPodiumRate,AverageQualifyingPosition,DriverExperienceYears,DNFCountThisSeason,TeammateComparison
0,NOR,McLaren,Singapore Grand Prix,230.0,458.0,,0.357143,0.714286,3.642857,6,2,2.0
1,VER,Red Bull Racing,Singapore Grand Prix,205.0,218.0,,0.285714,0.571429,3.214286,10,1,192.0
2,RUS,Mercedes,Singapore Grand Prix,162.0,220.0,,0.071429,0.357143,4.642857,6,1,104.0
3,ANT,Mercedes,Singapore Grand Prix,58.0,220.0,,0.0,0.071429,8.714286,0,6,-104.0
4,ALB,Williams,Singapore Grand Prix,46.0,71.0,,0.0,0.0,12.357143,6,5,21.0
5,STR,Aston Martin,Singapore Grand Prix,26.0,56.0,,0.0,0.0,15.692308,8,6,-4.0
6,HUL,Kick Sauber,Singapore Grand Prix,37.0,53.0,,0.0,0.071429,16.428571,15,6,21.0
7,LEC,Ferrari,Singapore Grand Prix,135.0,232.0,,0.0,0.285714,5.357143,7,1,38.0
8,PIA,McLaren,Singapore Grand Prix,228.0,458.0,,0.285714,0.785714,2.571429,2,1,-2.0
9,HAM,Ferrari,Singapore Grand Prix,97.0,232.0,,0.0,0.0,7.357143,18,2,-38.0
