# F1 What-If Simulator: Data Exploration and Training

This notebook connects to the OpenF1 API to explore Formula 1 data and prepare it for model training.

In [None]:
# Imports
import pandas as pd
import httpx
import asyncio
from typing import Dict, List, Optional

In [None]:
# API Base URL
OPENF1_BASE_URL = "https://api.openf1.org/v1"

## Find a Race Session

Our goal is to programmatically find the session_key for a specific, complete race event. We will use the 'Bahrain' Grand Prix from the 2023 season as our target.

In [None]:
async def fetch_session_key() -> Optional[str]:
    """
    Fetch the session_key for the Bahrain Grand Prix 2023 Race session.
    
    Returns:
        Optional[str]: The session_key if found, None otherwise
    """
    async with httpx.AsyncClient() as client:
        try:
            # Step 1: Find the meeting_key for Bahrain 2023
            print("Step 1: Finding Bahrain 2023 meeting...")
            meetings_response = await client.get(f"{OPENF1_BASE_URL}/meetings?year=2023")
            meetings_response.raise_for_status()
            meetings_data = meetings_response.json()
            
            # Find Bahrain meeting
            bahrain_meeting = None
            for meeting in meetings_data:
                if meeting.get('meeting_name', '').lower() == 'bahrain':
                    bahrain_meeting = meeting
                    break
            
            if not bahrain_meeting:
                print("Error: Bahrain meeting not found in 2023")
                return None
            
            meeting_key = bahrain_meeting['meeting_key']
            print(f"Found Bahrain meeting with key: {meeting_key}")
            
            # Step 2: Find the Race session for this meeting
            print("\nStep 2: Finding Race session...")
            sessions_response = await client.get(f"{OPENF1_BASE_URL}/sessions?meeting_key={meeting_key}")
            sessions_response.raise_for_status()
            sessions_data = sessions_response.json()
            
            # Find Race session
            race_session = None
            for session in sessions_data:
                if session.get('session_name', '').lower() == 'race':
                    race_session = session
                    break
            
            if not race_session:
                print("Error: Race session not found for Bahrain 2023")
                return None
            
            session_key = race_session['session_key']
            print(f"Found Race session with key: {session_key}")
            
            return session_key
            
        except httpx.HTTPStatusError as e:
            print(f"HTTP error occurred: {e}")
            return None
        except Exception as e:
            print(f"Error occurred: {e}")
            return None

# Execute the function
session_key = await fetch_session_key()
print(f"\nFinal session_key: {session_key}")

## Fetch Race Data

Now that we have the race session_key, let's fetch the essential data for our simulation.

In [None]:
async def fetch_race_data(session_key: str) -> Dict[str, pd.DataFrame]:
    """
    Fetch race data from OpenF1 API and return as DataFrames.
    
    Args:
        session_key (str): The session key for the race
        
    Returns:
        Dict[str, pd.DataFrame]: Dictionary containing 'laps', 'pit', and 'drivers' DataFrames
    """
    async with httpx.AsyncClient() as client:
        try:
            print(f"Fetching data for session_key: {session_key}")
            
            # Fetch lap data
            print("\nFetching lap data...")
            laps_response = await client.get(f"{OPENF1_BASE_URL}/laps?session_key={session_key}")
            laps_response.raise_for_status()
            laps_data = laps_response.json()
            laps_df = pd.DataFrame(laps_data)
            
            # Fetch pit stop data
            print("Fetching pit stop data...")
            pit_response = await client.get(f"{OPENF1_BASE_URL}/pit?session_key={session_key}")
            pit_response.raise_for_status()
            pit_data = pit_response.json()
            pit_df = pd.DataFrame(pit_data)
            
            # Fetch driver information
            print("Fetching driver information...")
            drivers_response = await client.get(f"{OPENF1_BASE_URL}/drivers?session_key={session_key}")
            drivers_response.raise_for_status()
            drivers_data = drivers_response.json()
            drivers_df = pd.DataFrame(drivers_data)
            
            print("\nData fetching completed successfully!")
            
            return {
                'laps': laps_df,
                'pit': pit_df,
                'drivers': drivers_df
            }
            
        except httpx.HTTPStatusError as e:
            print(f"HTTP error occurred: {e}")
            return {}
        except Exception as e:
            print(f"Error occurred: {e}")
            return {}

# Execute the function
if session_key:
    race_data = await fetch_race_data(session_key)
    
    # Extract DataFrames
    laps_df = race_data.get('laps', pd.DataFrame())
    pit_df = race_data.get('pit', pd.DataFrame())
    drivers_df = race_data.get('drivers', pd.DataFrame())
else:
    print("Cannot fetch race data without a valid session_key")

## Initial Data Review

Let's examine the structure, columns, and data types of our three DataFrames.

In [None]:
# Review Lap Data
print("=" * 50)
print("LAP DATA REVIEW")
print("=" * 50)
print("\nFirst 5 rows:")
print(laps_df.head())
print("\nDataFrame Info:")
print(laps_df.info())
print(f"\nShape: {laps_df.shape}")
print(f"Columns: {list(laps_df.columns)}")

In [None]:
# Review Pit Stop Data
print("=" * 50)
print("PIT STOP DATA REVIEW")
print("=" * 50)
print("\nFirst 5 rows:")
print(pit_df.head())
print("\nDataFrame Info:")
print(pit_df.info())
print(f"\nShape: {pit_df.shape}")
print(f"Columns: {list(pit_df.columns)}")

In [None]:
# Review Driver Data
print("=" * 50)
print("DRIVER DATA REVIEW")
print("=" * 50)
print("\nFirst 5 rows:")
print(drivers_df.head())
print("\nDataFrame Info:")
print(drivers_df.info())
print(f"\nShape: {drivers_df.shape}")
print(f"Columns: {list(drivers_df.columns)}")

## Summary

We have successfully:
1. Connected to the OpenF1 API
2. Found the session_key for the Bahrain Grand Prix 2023 Race
3. Fetched lap data, pit stop data, and driver information
4. Reviewed the structure and content of our datasets

This data will serve as the foundation for our F1 simulation model training.