In [2]:
import requests
import pandas as pd
import numpy as np
import os
import json
import time


In [3]:
# Test the Jolpica API with 2025 Round 1
url = "https://api.jolpi.ca/ergast/f1/2025/1/results.json"
response = requests.get(url)
data = response.json()

race = data['MRData']['RaceTable']['Races'][0]
print(f"Race: {race['raceName']}")
print(f"Date: {race['date']}")
print(f"Circuit: {race['Circuit']['circuitName']}")
print(f"Winner: {race['Results'][0]['Driver']['familyName']}")
print(f"\nStatus code: {response.status_code}")

Race: Australian Grand Prix
Date: 2025-03-16
Circuit: Albert Park Grand Prix Circuit
Winner: Norris

Status code: 200


In [4]:
# Get the full 2025 race schedule
url = "https://api.jolpi.ca/ergast/f1/2025.json"
response = requests.get(url)
data = response.json()

races = data['MRData']['RaceTable']['Races']
print(f"Total 2025 races available: {len(races)}")
print()
for race in races:
    print(f"Round {race['round']}: {race['raceName']} ‚Äî {race['date']}")

Total 2025 races available: 24

Round 1: Australian Grand Prix ‚Äî 2025-03-16
Round 2: Chinese Grand Prix ‚Äî 2025-03-23
Round 3: Japanese Grand Prix ‚Äî 2025-04-06
Round 4: Bahrain Grand Prix ‚Äî 2025-04-13
Round 5: Saudi Arabian Grand Prix ‚Äî 2025-04-20
Round 6: Miami Grand Prix ‚Äî 2025-05-04
Round 7: Emilia Romagna Grand Prix ‚Äî 2025-05-18
Round 8: Monaco Grand Prix ‚Äî 2025-05-25
Round 9: Spanish Grand Prix ‚Äî 2025-06-01
Round 10: Canadian Grand Prix ‚Äî 2025-06-15
Round 11: Austrian Grand Prix ‚Äî 2025-06-29
Round 12: British Grand Prix ‚Äî 2025-07-06
Round 13: Belgian Grand Prix ‚Äî 2025-07-27
Round 14: Hungarian Grand Prix ‚Äî 2025-08-03
Round 15: Dutch Grand Prix ‚Äî 2025-08-31
Round 16: Italian Grand Prix ‚Äî 2025-09-07
Round 17: Azerbaijan Grand Prix ‚Äî 2025-09-21
Round 18: Singapore Grand Prix ‚Äî 2025-10-05
Round 19: United States Grand Prix ‚Äî 2025-10-19
Round 20: Mexico City Grand Prix ‚Äî 2025-10-26
Round 21: S√£o Paulo Grand Prix ‚Äî 2025-11-09
Round 22: Las Vegas

In [5]:
def get_race_results(year, round_num):
    """Get race results for a specific round"""
    url = f"https://api.jolpi.ca/ergast/f1/{year}/{round_num}/results.json"
    response = requests.get(url)
    data = response.json()
    
    races = data['MRData']['RaceTable']['Races']
    if not races:
        return None
    
    race = races[0]
    results = []
    
    for result in race['Results']:
        results.append({
            'year': year,
            'round': int(race['round']),
            'race_name': race['raceName'],
            'circuit': race['Circuit']['circuitName'],
            'date': race['date'],
            'driver': result['Driver']['code'],
            'driver_name': f"{result['Driver']['givenName']} {result['Driver']['familyName']}",
            'team': result['Constructor']['name'],
            'grid_position': int(result['grid']),
            'finish_position': int(result['position']),
            'points': float(result['points']),
            'status': result['status'],
            'laps_completed': int(result['laps']),
            'fastest_lap_rank': result.get('FastestLap', {}).get('rank', None)
        })
    
    return results

# Test it on Round 1
test = get_race_results(2025, 1)
print(f"Collected {len(test)} driver results")
print(f"First entry: {test[0]}")

Collected 20 driver results
First entry: {'year': 2025, 'round': 1, 'race_name': 'Australian Grand Prix', 'circuit': 'Albert Park Grand Prix Circuit', 'date': '2025-03-16', 'driver': 'NOR', 'driver_name': 'Lando Norris', 'team': 'McLaren', 'grid_position': 1, 'finish_position': 1, 'points': 25.0, 'status': 'Finished', 'laps_completed': 57, 'fastest_lap_rank': '1'}


In [6]:
def get_qualifying_results(year, round_num):
    """Get qualifying results for a specific round"""
    url = f"https://api.jolpi.ca/ergast/f1/{year}/{round_num}/qualifying.json"
    response = requests.get(url)
    data = response.json()
    
    races = data['MRData']['RaceTable']['Races']
    if not races:
        return None
    
    race = races[0]
    results = []
    
    for result in race['QualifyingResults']:
        results.append({
            'year': year,
            'round': int(race['round']),
            'race_name': race['raceName'],
            'driver': result['Driver']['code'],
            'driver_name': f"{result['Driver']['givenName']} {result['Driver']['familyName']}",
            'team': result['Constructor']['name'],
            'quali_position': int(result['position']),
            'q1_time': result.get('Q1', None),
            'q2_time': result.get('Q2', None),
            'q3_time': result.get('Q3', None)
        })
    
    return results

# Test it on Round 1
test_quali = get_qualifying_results(2025, 1)
print(f"Collected {len(test_quali)} qualifying results")
print(f"First entry: {test_quali[0]}")

Collected 20 qualifying results
First entry: {'year': 2025, 'round': 1, 'race_name': 'Australian Grand Prix', 'driver': 'NOR', 'driver_name': 'Lando Norris', 'team': 'McLaren', 'quali_position': 1, 'q1_time': '1:15.912', 'q2_time': '1:15.415', 'q3_time': '1:15.096'}


In [8]:
def get_pit_stops(year, round_num):
    """Get pit stop data for a specific round"""
    url = f"https://api.jolpi.ca/ergast/f1/{year}/{round_num}/pitstops.json"
    response = requests.get(url)
    data = response.json()
    
    races = data['MRData']['RaceTable']['Races']
    if not races:
        return None
    
    race = races[0]
    results = []
    
    for stop in race['PitStops']:
        results.append({
            'year': year,
            'round': int(race['round']),
            'race_name': race['raceName'],
            'driver': stop['driverId'],
            'stop_number': int(stop['stop']),
            'lap': int(stop['lap']),
            'duration': stop['duration']
        })
    
    return results

# Test it on Round 1
test_pits = get_pit_stops(2025, 1)
print(f"Collected {len(test_pits)} pit stops")
print(f"First entry: {test_pits[0]}")

Collected 30 pit stops
First entry: {'year': 2025, 'round': 1, 'race_name': 'Australian Grand Prix', 'driver': 'norris', 'stop_number': 1, 'lap': 2, 'duration': '13.341'}


In [9]:
# Collect ALL 2025 data
all_results = []
all_qualifying = []
all_pitstops = []

print("Starting 2025 data collection...")
print("="*50)

for round_num in range(1, 25):
    print(f"Collecting Round {round_num}/24...", end=" ")
    
    # Race results
    results = get_race_results(2025, round_num)
    if results:
        all_results.extend(results)
    
    # Qualifying
    quali = get_qualifying_results(2025, round_num)
    if quali:
        all_qualifying.extend(quali)
    
    # Pit stops
    pits = get_pit_stops(2025, round_num)
    if pits:
        all_pitstops.extend(pits)
    
    print("‚úÖ")
    time.sleep(1)  # be polite to the API

print("="*50)
print(f"\nüèÅ Collection complete!")
print(f"Race results: {len(all_results)} rows")
print(f"Qualifying: {len(all_qualifying)} rows")
print(f"Pit stops: {len(all_pitstops)} rows")

Starting 2025 data collection...
Collecting Round 1/24... ‚úÖ
Collecting Round 2/24... ‚úÖ
Collecting Round 3/24... ‚úÖ
Collecting Round 4/24... ‚úÖ
Collecting Round 5/24... ‚úÖ
Collecting Round 6/24... ‚úÖ
Collecting Round 7/24... ‚úÖ
Collecting Round 8/24... ‚úÖ
Collecting Round 9/24... ‚úÖ
Collecting Round 10/24... ‚úÖ
Collecting Round 11/24... ‚úÖ
Collecting Round 12/24... ‚úÖ
Collecting Round 13/24... ‚úÖ
Collecting Round 14/24... ‚úÖ
Collecting Round 15/24... ‚úÖ
Collecting Round 16/24... ‚úÖ
Collecting Round 17/24... ‚úÖ
Collecting Round 18/24... ‚úÖ
Collecting Round 19/24... ‚úÖ
Collecting Round 20/24... ‚úÖ
Collecting Round 21/24... ‚úÖ
Collecting Round 22/24... ‚úÖ
Collecting Round 23/24... ‚úÖ
Collecting Round 24/24... ‚úÖ

üèÅ Collection complete!
Race results: 479 rows
Qualifying: 479 rows
Pit stops: 629 rows


In [11]:
# Convert to DataFrames
df_results = pd.DataFrame(all_results)
df_qualifying = pd.DataFrame(all_qualifying)
df_pitstops = pd.DataFrame(all_pitstops)

# Save to CSV
df_results.to_csv('../data/race_results_2025.csv', index=False)
df_qualifying.to_csv('../data/qualifying_2025.csv', index=False)
df_pitstops.to_csv('../data/pitstops_2025.csv', index=False)

print("Files saved!")
print(f"\nrace_results_2025.csv: {df_results.shape[0]} rows x {df_results.shape[1]} columns")
print(f"qualifying_2025.csv: {df_qualifying.shape[0]} rows x {df_qualifying.shape[1]} columns")
print(f"pitstops_2025.csv: {df_pitstops.shape[0]} rows x {df_pitstops.shape[1]} columns")

# Check file sizes
import os
for f in ['race_results_2025.csv', 'qualifying_2025.csv', 'pitstops_2025.csv']:
    size = os.path.getsize(f'../data/{f}')
    print(f"{f}: {size/1024:.1f} KB")

Files saved!

race_results_2025.csv: 479 rows x 14 columns
qualifying_2025.csv: 479 rows x 10 columns
pitstops_2025.csv: 629 rows x 7 columns
race_results_2025.csv: 55.1 KB
qualifying_2025.csv: 38.2 KB
pitstops_2025.csv: 30.2 KB


In [12]:
# Quick look at all three datasets
print("=== RACE RESULTS ===")
print(df_results.head(3).to_string())

print("\n=== QUALIFYING ===")
print(df_qualifying.head(3).to_string())

print("\n=== PIT STOPS ===")
print(df_pitstops.head(3).to_string())

# Check for any missing values
print("\n=== MISSING VALUES ===")
print("Race Results:")
print(df_results.isnull().sum())
print("\nQualifying:")
print(df_qualifying.isnull().sum())
print("\nPit Stops:")
print(df_pitstops.isnull().sum())

=== RACE RESULTS ===
   year  round              race_name                         circuit        date driver     driver_name      team  grid_position  finish_position  points    status  laps_completed fastest_lap_rank
0  2025      1  Australian Grand Prix  Albert Park Grand Prix Circuit  2025-03-16    NOR    Lando Norris   McLaren              1                1    25.0  Finished              57                1
1  2025      1  Australian Grand Prix  Albert Park Grand Prix Circuit  2025-03-16    VER  Max Verstappen  Red Bull              3                2    18.0  Finished              57                3
2  2025      1  Australian Grand Prix  Albert Park Grand Prix Circuit  2025-03-16    RUS  George Russell  Mercedes              4                3    15.0  Finished              57               11

=== QUALIFYING ===
   year  round              race_name driver     driver_name      team  quali_position   q1_time   q2_time   q3_time
0  2025      1  Australian Grand Prix    NOR    La