In [None]:
import fastf1
import pandas as pd
from collections import defaultdict

# Enable cache
fastf1.Cache.enable_cache('cache')


In [None]:
REAL_2025_EVENTS = [
    "Australian Grand Prix",
    "Chinese Grand Prix",
    "Japanese Grand Prix",
    "Bahrain Grand Prix",
    "Saudi Arabian Grand Prix",
    "Miami Grand Prix",
    "Emilia Romagna Grand Prix",
    "Monaco Grand Prix",
    "Spanish Grand Prix",
    "Canadian Grand Prix",
    "Austrian Grand Prix",
    "British Grand Prix",
    "Belgian Grand Prix",
    "Hungarian Grand Prix",
    "Dutch Grand Prix",
    "Italian Grand Prix",
    "Azerbaijan Grand Prix",
    "Singapore Grand Prix",
    "Mexico City Grand Prix",
    "Las Vegas Grand Prix",
    "Abu Dhabi Grand Prix"
]

In [23]:
import fastf1
import pandas as pd
from collections import defaultdict

fastf1.Cache.enable_cache('cache')

driver_debut_years_2025 = {
    "VER": 2015, 
    "LAW": 2022,
    "LEC": 2018,
    "HAM": 2007,
    "RUS": 2019,
    "ANT": 2025,
    "NOR": 2019,
    "PIA": 2023,
    "ALO": 2001,
    "STR": 2017,
    "GAS": 2017,
    "DOO": 2025,
    "OCO": 2016,
    "BER": 2025,
    "HUL": 2010,
    "BOT": 2013,
    "ALB": 2015,
    "SAR": 2025,
    "TSU": 2021,
    "RIC": None,
    "HAD": 2025,
    "BOR": 2025 
}

def build_features_until_race(year: int, target_race: str) -> pd.DataFrame:
    driver_points = defaultdict(float)
    constructor_points = defaultdict(float)
    driver_wins = defaultdict(int)
    driver_podiums = defaultdict(int)
    driver_dnf = defaultdict(int)
    driver_quali_positions = defaultdict(list)
    driver_info = {}

    rows = []

    for race_name in REAL_2025_EVENTS:
        if race_name == target_race:
            # Stop before this race, build rows
            for dn, (abbr, team) in driver_info.items():
                races_so_far = len(driver_quali_positions[dn])
                win_rate = driver_wins[dn] / races_so_far if races_so_far > 0 else 0.0
                podium_rate = driver_podiums[dn] / races_so_far if races_so_far > 0 else 0.0
                avg_quali = sum(driver_quali_positions[dn]) / races_so_far if races_so_far > 0 else 0.0
                exp_years = year - driver_debut_years_2025.get(abbr, year)

                dnf_count = driver_dnf[dn]

                teammate = [d for d, (_, t) in driver_info.items() if t == team and d != dn]
                teammate_points = driver_points.get(teammate[0], 0.0) if teammate else 0.0
                teammate_comparison = driver_points.get(dn, 0.0) - teammate_points

                rows.append({
                    "Driver": abbr,
                    "Constructor": team,
                    "RaceName": race_name,
                    "DriverPointsSoFar": driver_points.get(dn, 0.0),
                    "ConstructorPointsSoFar": constructor_points.get(team, 0.0),
                    "NextRacePosition": None,  # target to be predicted
                    "DriverWinRateThisSeason": win_rate,
                    "DriverPodiumRate": podium_rate,
                    "AverageQualifyingPosition": avg_quali,
                    "DriverExperienceYears": exp_years,
                    "DNFCountThisSeason": dnf_count,
                    "TeammateComparison": teammate_comparison
                })
            break

        # Load past race data
        try:
            ses = fastf1.get_session(year, race_name, 'R')
            ses.load()
            res = ses.results.copy()
            res['Points'] = pd.to_numeric(res['Points'], errors='coerce').fillna(0.0)
        except Exception as e:
            print(f"Skipping {race_name} (no data): {e}")
            continue

        # Qualifying data
        quali_positions = {}
        try:
            q = fastf1.get_session(year, race_name, 'Q')
            q.load()
            qres = q.results.copy()
            for _, row in qres.iterrows():
                dn = int(row['DriverNumber'])
                if not pd.isna(row['Position']):
                    quali_positions[dn] = int(row['Position'])
        except Exception:
            pass

        # Update accumulators
        for _, row in res.iterrows():
            dn = int(row['DriverNumber'])
            abbr = row['Abbreviation']
            team = row['TeamName']
            pts = float(row['Points'])
            pos = row['Position']
            status = str(row['Status']).lower()

            driver_info[dn] = (abbr, team)
            driver_points[dn] += pts
            constructor_points[team] += pts

            if pos == 1:
                driver_wins[dn] += 1
            if pos in [1, 2, 3]:
                driver_podiums[dn] += 1
            if status not in ["finished", "classified finish"]:
                driver_dnf[dn] += 1

            if dn in quali_positions:
                driver_quali_positions[dn].append(quali_positions[dn])

    return pd.DataFrame(rows)



df = build_features_until_race(2025, "Singapore Grand Prix")
print(df)
print("\nTotal drivers:", len(df))


core           INFO 	Loading data for Australian Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['4', '1', '63', '12', '23', '18', '27', '16', '81', '44', '10', '22', '31', '87', '30', '5', '14', '55', '7', '6']
core           INFO 	Loading data for Australian Grand Prix 

KeyboardInterrupt: 

In [None]:
df= df.drop('NextRacePosition',axis=1)
df.to_csv("to_pred.csv",index=0)