In [2]:
# Import libraries
import requests
import json
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

In [3]:
def get_season_list(start_year=2014):
    url = 'http://ergast.com/api/f1/seasons.json?limit=1000'
    response = requests.get(url)
    data = response.json()
    seasons = data['MRData']['SeasonTable']['Seasons']
    df_seasons = pd.DataFrame(seasons)
    df_seasons = df_seasons[df_seasons['season'].astype(int) >= start_year]
    return df_seasons.reset_index(drop=True)

df_season_list = get_season_list()

In [4]:
def get_race_schedule(start_year=2014, end_year=2023):
    all_races = []
    for year in range(start_year, end_year + 1):
        url = f'http://ergast.com/api/f1/{year}.json'
        response = requests.get(url)
        races = response.json()['MRData']['RaceTable']['Races']
        for race in races:
            race_info = {
                'season': race['season'],
                'round': race['round'],
                'raceName': race['raceName'],
                'date': race['date'],
                'time': race['time'],
                'circuitId': race['Circuit']['circuitId'],
                'circuitName': race['Circuit']['circuitName'],
                'location-lat': race['Circuit']['Location']['lat'],
                'location-long': race['Circuit']['Location']['long'],
                'location-locality': race['Circuit']['Location']['locality'],
                'location-country': race['Circuit']['Location']['country']
            }
            all_races.append(race_info)
    df_race_schedule = pd.DataFrame(all_races)
    return df_race_schedule

df_race_schedule = get_race_schedule()

In [21]:
def get_rounds_in_season(year):
    url = f'http://ergast.com/api/f1/{year}.json'
    response = requests.get(url)
    return len(response.json()['MRData']['RaceTable']['Races'])

def fetch_race_results(year, round_num):
    url = f'http://ergast.com/api/f1/{year}/{round_num}/results.json?limit=1000'
    response = requests.get(url)
    data = response.json()['MRData']['RaceTable']['Races']
    
    race = data[0]
    race_results = []
    for result in race['Results']:
        result_info = {
            'season': race['season'],
            'round': race['round'],
            'raceName': race['raceName'],
            'date': race['date'],
            'driverId': result['Driver']['driverId'],
            'constructorId': result['Constructor']['constructorId'],
            'grid': result['grid'],
            'position': result['position'],
            'status': result['status'],
            'points': result['points'],
            'time-ms': result['Time']['millis'] if 'Time' in result else None,
            'time-hrmins': result['Time']['time'] if 'Time' in result else None,
            'Max Avg Speed Unit': result['FastestLap']['AverageSpeed']['units'] if 'FastestLap' in result else None,
            'Max Avg Speed': result['FastestLap']['AverageSpeed']['speed'] if 'FastestLap' in result else None
            }
        race_results.append(result_info)
    return race_results

def get_race_results(start_year, end_year):
    all_results = []

    with ThreadPoolExecutor(max_workers=12) as executor:
        futures = []
        
        for year in range(start_year, end_year + 1):
            num_rounds = get_rounds_in_season(year)
            
            for round_num in range(1, num_rounds + 1):
                futures.append(executor.submit(fetch_race_results, year, round_num))
        
        for future in futures:
            result = future.result()
            if result:
                all_results.extend(result)

    df_race_results = pd.DataFrame(all_results)
    return df_race_results

df_race_results = get_race_results(2022, 2023)

In [25]:
def fetch_qualifying_results(year, round_num):
    url = f'http://ergast.com/api/f1/{year}/{round_num}/qualifying.json?limit=1000'
    response = requests.get(url)
    data = response.json()['MRData']['RaceTable']['Races']
    
    race = data[0]
    quali_results = []
    for quali in race['QualifyingResults']:
        quali_info = {
            'season': race['season'],
            'round': race['round'],
            'raceName': race['raceName'],
            'driverId': quali['Driver']['driverId'],
            'constructorId': quali['Constructor']['constructorId'],
            'Q1': quali.get('Q1', None),
            'Q2': quali.get('Q2', None),
            'Q3': quali.get('Q3', None),
            }
        quali_results.append(quali_info)
    return quali_results

def get_qualifying_results(start_year, end_year):
    all_results = []

    with ThreadPoolExecutor(max_workers=24) as executor:
        futures = []
        
        for year in range(start_year, end_year + 1):
            num_rounds = get_rounds_in_season(year)
            
            for round_num in range(1, num_rounds + 1):
                futures.append(executor.submit(fetch_qualifying_results, year, round_num))
        
        for future in futures:
            result = future.result()
            if result:
                all_results.extend(result)

    df_quali_results = pd.DataFrame(all_results)
    return df_quali_results

df_quali_results = get_qualifying_results(2014, 2023)