# Script for Extracting Radios 

This script is for extracting the team radios of the 2023 season.

Races extracted:
* Belgium.
* Singapore.
* Spain.
* Monaco.
* Brazil.
* Netherlands.

### Flag for knowing if I needed to add more data due to eliminating post-race radios

I needed to add the last 3 GPs after eliminating post-race radios.

---

#### Getting meeting key
https://api.openf1.org/v1/meetings?year=2023&country_name=Spain
#### Getting session key
GET https://api.openf1.org/v1/sessions?meeting_key=1218&session_name=Race
#### Getting radios
https://api.openf1.org/v1/team_radio?session_key=9158&driver_number=11



In [18]:
"""
OpenF1 Team Radio Data Extraction Script - Multiple Grand Prix
------------------------------------------------------------------
Extracts all team radio data from the OpenF1 API for several GPs
and saves them in Parquet format as well as audio files.
"""

import pandas as pd
from pathlib import Path
import time
import requests

# Use current_dir based on **file** or cwd (useful in notebooks)
try:
    current_dir = Path(__file__).parent
except NameError:
    current_dir = Path.cwd()

# Navigate to project root from scripts/data_extraction
# First, go up to scripts folder
scripts_dir = current_dir.parent
# Then go up again to reach project root
project_root = scripts_dir.parent

# Destination directory for the Parquet files (f1-strategy/data/raw)
output_dir = project_root / "f1-strategy" / "data" / "raw"
output_dir.mkdir(parents=True, exist_ok=True)

# Base directory for the audio files
audio_base_dir = project_root / "f1-strategy" / "data" / "audio"
audio_base_dir.mkdir(parents=True, exist_ok=True)

# Define the session_keys and names of the GPs we want to extract
gp_data = [
    {"name": "Spain", "session_key": 9158, "year": 2023},
    {"name": "Singapore", "session_key": 9165, "year": 2023},
    {"name": "Belgium", "session_key": 9141, "year": 2023},
    {"name": "Monaco", "session_key": 9094, "year": 2023},
    {"name": "Brazil", "session_key": 9205, "year": 2023},  # Fixed typo in key name
    {"name": "Netherlands", "session_key": 9149, "year": 2023},
]



In [19]:
def fetch_team_radio(session_key, gp_name):
    """
    Extracts ALL team radio messages for a specific race,
    without filtering by driver.
    """
    # The driver_number parameter is omitted to retrieve all radio messages
    url = f"https://api.openf1.org/v1/team_radio?session_key={session_key}"
    print(f"Fetching: {url}")
    
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        print(f"✓ Found {len(data)} records for GP {gp_name}")
        df = pd.DataFrame(data)
        if 'date' in df.columns:
            df['date'] = pd.to_datetime(df['date'], errors='coerce')
        
        # Add a column with the GP name for identification
        df['gp_name'] = gp_name
        
        return df
    except Exception as e:
        print(f"Error retrieving data for GP {gp_name}: {e}")
        return pd.DataFrame()


In [20]:
def download_radio_files(df, gp_name):
    """
    Downloads audio files for a specific team radio DataFrame.
    """
    print(f"\nDownloading audio files for the {gp_name} GP...")

    # Group by driver
    grouped = df.groupby(["driver_number"])

    total_downloads = 0
    for driver_number, group in grouped:
        folder_name = f"driver_{driver_number}"
        driver_folder = audio_base_dir / folder_name
        driver_folder.mkdir(parents=True, exist_ok=True)

        # Download and save audio files
        for i, row in group.iterrows():
            url = row["recording_url"]
            if pd.isna(url):
                continue

            # Create a filename including the GP name
            filename = f"driver_{driver_number}_{gp_name.lower()}_radio_{i}.mp3"
            output_path = driver_folder / filename

            # Check if the file already exists to avoid duplicate downloads
            if output_path.exists():
                #print(f"File already exists: {output_path}")
                continue

            # Download the file
            try:
                response = requests.get(url)
                response.raise_for_status()
                with open(output_path, "wb") as f:
                    f.write(response.content)
                print(f"Downloaded: {output_path}")
                total_downloads += 1

                # Small delay to avoid overloading the server
                time.sleep(0.5)
            except Exception as e:
                print(f"Error downloading {url}: {e}")

    return total_downloads


In [21]:
# Extract data and download audio files for all defined GPs
all_dfs = []
total_files_downloaded = 0

for gp in gp_data:
    print(f"\n--- Processing {gp['name']} GP {gp['year']} ---")

    # Extract team radio data
    df_team_radio = fetch_team_radio(gp['session_key'], gp['name'])

    if not df_team_radio.empty:
        # Save as Parquet
        parquet_path = output_dir / f"{gp['name']}_{gp['year']}_openf1_team_radio.parquet"
        df_team_radio.to_parquet(parquet_path)
        print(f"Data saved to {parquet_path}")

        # Download audio files
        files_downloaded = download_radio_files(df_team_radio, gp['name'])
        total_files_downloaded += files_downloaded

        # Add to the combined DataFrame
        all_dfs.append(df_team_radio)
    else:
        print(f"No data found for {gp['name']} GP")



--- Processing Spain GP 2023 ---
Fetching: https://api.openf1.org/v1/team_radio?session_key=9158
✓ Found 29 records for GP Spain
Data saved to c:\Users\victo\Desktop\Documents\Tercer año\Segundo Cuatrimestre\Finales\f1-strategy\data\raw\Spain_2023_openf1_team_radio.parquet

Downloading audio files for the Spain GP...
Downloaded: c:\Users\victo\Desktop\Documents\Tercer año\Segundo Cuatrimestre\Finales\f1-strategy\data\audio\driver_(1,)\driver_(1,)_spain_radio_5.mp3
Downloaded: c:\Users\victo\Desktop\Documents\Tercer año\Segundo Cuatrimestre\Finales\f1-strategy\data\audio\driver_(1,)\driver_(1,)_spain_radio_9.mp3
Downloaded: c:\Users\victo\Desktop\Documents\Tercer año\Segundo Cuatrimestre\Finales\f1-strategy\data\audio\driver_(1,)\driver_(1,)_spain_radio_14.mp3
Downloaded: c:\Users\victo\Desktop\Documents\Tercer año\Segundo Cuatrimestre\Finales\f1-strategy\data\audio\driver_(4,)\driver_(4,)_spain_radio_11.mp3
Downloaded: c:\Users\victo\Desktop\Documents\Tercer año\Segundo Cuatrimestre\F

In [22]:
# Combine all DataFrames into a single one
if all_dfs:
    combined_df = pd.concat(all_dfs, ignore_index=True)
    combined_path = output_dir / "Combined_2023_openf1_team_radio.parquet"
    combined_df.to_parquet(combined_path)
    print(f"\n✓ All combined data saved to {combined_path}")

    # Display final statistics
    print(f"\n--- FINAL SUMMARY ---")
    print(f"Total GPs processed: {len(all_dfs)} out of {len(gp_data)}")
    print(f"Total team radio records: {len(combined_df)}")
    print(f"Total audio files downloaded: {total_files_downloaded}")

    # Show communication count per driver
    print("\nDistribution of communications per driver:")
    driver_counts = combined_df['driver_number'].value_counts().sort_index()
    for driver_num, count in driver_counts.items():
        print(f"  • Driver #{driver_num}: {count} communications")
else:
    print("No data found for any GP. Check your connection or session_keys.")



✓ All combined data saved to c:\Users\victo\Desktop\Documents\Tercer año\Segundo Cuatrimestre\Finales\f1-strategy\data\raw\Combined_2023_openf1_team_radio.parquet

--- FINAL SUMMARY ---
Total GPs processed: 6 out of 6
Total team radio records: 684
Total audio files downloaded: 684

Distribution of communications per driver:
  • Driver #1: 59 communications
  • Driver #2: 17 communications
  • Driver #3: 8 communications
  • Driver #4: 36 communications
  • Driver #10: 25 communications
  • Driver #11: 38 communications
  • Driver #14: 40 communications
  • Driver #16: 27 communications
  • Driver #18: 20 communications
  • Driver #20: 27 communications
  • Driver #21: 7 communications
  • Driver #22: 17 communications
  • Driver #23: 27 communications
  • Driver #24: 27 communications
  • Driver #27: 39 communications
  • Driver #31: 39 communications
  • Driver #40: 7 communications
  • Driver #44: 49 communications
  • Driver #55: 67 communications
  • Driver #63: 63 communications
