In [43]:
import fastf1
from datetime import datetime
import pandas as pd
import numpy as np
import json
import os
from fastf1.ergast import Ergast
import requests

In [12]:
fastf1.Cache.enable_cache('cache')

In [13]:
def to_native(obj):
    """Convert NumPy/Pandas types to native Python for JSON serialization."""
    if isinstance(obj, (np.integer,)):
        return int(obj)
    elif isinstance(obj, (np.floating,)):
        return float(obj)
    elif isinstance(obj, (np.ndarray,)):
        return obj.tolist()
    elif pd.isna(obj):
        return None
    return obj

In [14]:
def safe_int(val):
    try:
        if pd.isna(val):
            return None
        return int(val)
    except (ValueError, TypeError):
        return None

In [None]:
def safe_float(val):
    return None if pd.isna(val) else float(val)

In [45]:
import requests

try:
    r = requests.get('https://ergast.com/api/f1/2023/driverStandings.json')
    r.raise_for_status()
    print("Success! Ergast API reachable.")
except Exception as e:
    print("Failed to connect:", e)


Failed to connect: HTTPSConnectionPool(host='ergast.com', port=443): Max retries exceeded with url: /api/f1/2023/driverStandings.json (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000001F3E8AE5A90>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))


In [46]:
def collect_gp_data(year, grand_prix, session_type="R"):
    session = fastf1.get_session(year, grand_prix, session_type)
    session.load()

    gp_data = {
        "year": year,
        "grand_prix": grand_prix,
        "session": session_type,
        "drivers": []
    }

    for driver in session.drivers:
        drv = session.get_driver(driver)
        laps = session.laps.pick_driver(driver)
        tel = session.car_data.get(driver, None)

        driver_data = {
            "driverId": to_native(drv["DriverNumber"]),
            "teamId": to_native(drv["TeamName"]),
            "timestamp": datetime.utcnow().isoformat(),

            # --- Prerace features ---
            "prerace": {
                "driverChampPoints": None,  # External source
                "constructorChampPoints": None,  # External source
                "trackType": None,  # External source
                "expectedWeather": None  # session.weather_data if available
            },

            # --- Qualifying features ---
            "qualifying": {
                "gridPosition": safe_int(session.results.loc[driver, "GridPosition"]) if driver in session.results.index else None,
                "best3LapAvg": safe_float(
                    laps.nsmallest(3, "LapTime")["LapTime"].mean().total_seconds()
                ) if not laps.empty else None,
                "deltaToPole": None,  # Calculate from best lap
                "tireBestLap": None   # Compound from best lap
            },

            # --- Past laps features ---
            "pastLaps": {
                "currentLap": safe_int(laps.iloc[-1]["LapNumber"]) if not laps.empty else None,
                "position": safe_int(laps.iloc[-1]["Position"]) if not laps.empty else None,
                "avgLapTime": safe_float(laps["LapTime"].mean().total_seconds()) if not laps.empty else None,
                "numPitStops": safe_int(laps["PitInTime"].count()) if "PitInTime" in laps else None,
                "timeLostInPits": None,  # Calculate if needed
                "positionsGainedLost": None,
                "deltaToLeader": None,
                "gapAhead": None,
                "gapBehind": None
            },

            # --- Telemetry features ---
            "telemetry": {
                "avgSpeed": safe_float(tel["Speed"].mean()) if tel is not None and "Speed" in tel else None,
                "throttleAvg": safe_float(tel["Throttle"].mean()) if tel is not None and "Throttle" in tel else None,
                "brakeUsagePct": None,  # Needs calculation
                "drsUsageFreq": safe_int(tel["DRS"].sum()) if tel is not None and "DRS" in tel else None,
                "gearShiftsPerLap": None  # Needs calculation
            }
        }

        # Convert everything to native types
        driver_data = json.loads(json.dumps(driver_data, default=to_native))
        gp_data["drivers"].append(driver_data)

    return gp_data

In [None]:
if __name__ == "__main__":
    year = 2024
    grand_prix = "Monaco"
    data = collect_gp_data(year, grand_prix)

    os.makedirs("datasets", exist_ok=True)
    file_path = f"datasets/{year}_{grand_prix.replace(' ', '_')}_full.json"

    def default_converter(o):
        if isinstance(o, (pd.Series, pd.DataFrame)):
            return o.to_dict()
        if hasattr(o, "item"):
            return o.item()
        return str(o)

    with open(file_path, "w") as f:
        json.dump(data, f, indent=2, default=default_converter)

    print(f"Saved GP data to {file_path}")

core           INFO 	Loading data for Monaco Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '81', '55', '4', '63', '1', '44', '22', '23', '10', '14', '3', '77', '18', '2', '24', '31', '11', '27', '20']


Saved GP data to datasets/2024_Monaco_full.json


  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
  "timestamp": datetime.utcnow().isoformat(),
