In [None]:
# Import libraries
import requests
import json
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

In [None]:
def get_rounds_in_season(year):
    url = f'http://ergast.com/api/f1/{year}.json'
    response = requests.get(url)
    return len(response.json()['MRData']['RaceTable']['Races'])

In [None]:
def get_constructor_standings(start_year=2017, end_year=2024):
    all_standings = []
    for year in range(start_year, end_year + 1):
        url = f'http://ergast.com/api/f1/{year}/constructorStandings.json?limit=1000'
        response = requests.get(url)
        data = response.json()['MRData']['StandingsTable']['StandingsLists']
        if data:
            standings = data[0]['ConstructorStandings']
            for standing in standings:
                standing_info = {
                    'season': year,
                    'position': standing['position'],
                    'points': standing['points'],
                    'wins': standing['wins'],
                    'constructorId': standing['Constructor']['constructorId'],
                    'constructorName': standing['Constructor']['name'],
                    'nationality': standing['Constructor']['nationality']
                }
                all_standings.append(standing_info)
    df_constructor_standings = pd.DataFrame(all_standings)
    return df_constructor_standings

df_constructor_standings = get_constructor_standings(2017, 2024)

In [None]:
def get_driver_info():
    drivers = []
    limit = 100
    offset = 0
    while True:
        url = f'http://ergast.com/api/f1/drivers.json?limit={limit}&offset={offset}'
        response = requests.get(url)
        data = response.json()['MRData']['DriverTable']['Drivers']
        if not data:
            break
        drivers.extend(data)
        offset += limit
    df_drivers = pd.DataFrame(drivers)
    df_drivers['dateOfBirth'] = pd.to_datetime(df_drivers['dateOfBirth'])
    return df_drivers

In [None]:
def get_driver_standings(start_year=2017, end_year=2024):
    all_standings = []
    for year in range(start_year, end_year + 1):
        url = f'http://ergast.com/api/f1/{year}/driverStandings.json?limit=1000'
        response = requests.get(url)
        data = response.json()['MRData']['StandingsTable']['StandingsLists']
        if data:
            standings = data[0]['DriverStandings']
            for standing in standings:
                standing_info = {
                    'season': year,
                    'position': standing['position'],
                    'points': standing['points'],
                    'wins': standing['wins'],
                    'driverId': standing['Driver']['driverId'],
                    'driverName': f"{standing['Driver']['givenName']} {standing['Driver']['familyName']}",
                    'constructorId': standing['Constructors'][0]['constructorId'],
                    'constructorName': standing['Constructors'][0]['name']
                }
                all_standings.append(standing_info)
    df_driver_standings = pd.DataFrame(all_standings)
    return df_driver_standings

In [None]:
def get_finishing_status():
    url = 'http://ergast.com/api/f1/status.json?limit=1000'
    response = requests.get(url)
    statuses = response.json()['MRData']['StatusTable']['Status']
    df_status = pd.DataFrame(statuses)
    return df_status

In [None]:
def get_lap_times(start_year=2017, end_year=2024):
    all_laps = []
    for year in range(start_year, end_year + 1):
        num_rounds = get_rounds_in_season(year)
        for round_num in range(1, num_rounds + 1):
            lap_number = 1
            while True:
                url = f'http://ergast.com/api/f1/{year}/{round_num}/laps/{lap_number}.json'
                response = requests.get(url)
                data = response.json()['MRData']['RaceTable']['Races']
                if not data:
                    break
                laps = data[0].get('Laps', [])
                if not laps:
                    break
                for timing in laps[0]['Timings']:
                    lap_info = {
                        'season': year,
                        'round': round_num,
                        'lap': lap_number,
                        'position': int(timing['position']),
                        'driverId': timing['driverId'],
                        'time': timing['time']
                    }
                    all_laps.append(lap_info)
                lap_number += 1
    df_lap_times = pd.DataFrame(all_laps)
    return df_lap_times

In [None]:
def fetch_pit_results(year, round_num):
    url = f'http://ergast.com/api/f1/{year}/{round_num}/pitstops.json?limit=1000'
    response = requests.get(url)
    data = response.json()['MRData']['RaceTable']['Races']
    if data:
        pitstops = data[0]['PitStops']
        pit_results = []
        for pit in pitstops:
            pit_info = {
                'season': year,
            'round': round_num,
            'driverId': pit['driverId'],
            'stop': pit.get('stop', None),
            'lap': pit.get('lap', None),
            'time': pit.get('time', None),
            'duration': pit.get('duration', None)
            }
            pit_results.append(pit_info)
        return pit_results

def get_pit_results(start_year, end_year):
    all_results = []
    with ThreadPoolExecutor(max_workers=24) as executor:
        futures = []
        for year in range(start_year, end_year + 1):
            num_rounds = get_rounds_in_season(year)
            for round_num in range(1, num_rounds + 1):
                futures.append(executor.submit(fetch_pit_results, year, round_num))
        
        for future in futures:
            result = future.result()
            if result:
                all_results.extend(result)
    df_pit_results = pd.DataFrame(all_results)
    df_pit_results['duration'] = pd.to_numeric(df_pit_results['duration'], errors='coerce')
    return df_pit_results

In [None]:
def fetch_race_results(year, round_num):
    url = f'http://ergast.com/api/f1/{year}/{round_num}/results.json?limit=1000'
    response = requests.get(url)
    data = response.json()['MRData']['RaceTable']['Races']
    if not data:
        return []
    if data:
        race = data[0]
        race_results = []
        for result in race['Results']:
            result_info = {
                'season': race['season'],
                'round': race['round'],
                'raceName': race['raceName'],
                'date': race['date'],
                'driverId': result['Driver']['driverId'],
                'constructorId': result['Constructor']['constructorId'],
                'grid': result['grid'],
                'position': result.get('position', None),
                'status': result['status'],
                'points': result['points'],
                'time-ms': result['Time']['millis'] if 'Time' in result else None,
                'time-hrmins': result['Time']['time'] if 'Time' in result else None,
                'Max Avg Speed Unit': result['FastestLap']['AverageSpeed']['units'] if 'FastestLap' in result else None,
                'Max Avg Speed': result['FastestLap']['AverageSpeed']['speed'] if 'FastestLap' in result else None
            }
            race_results.append(result_info)
    return race_results

def get_race_results(start_year, end_year):
    all_results = []
    with ThreadPoolExecutor(max_workers=12) as executor:
        futures = []
        for year in range(start_year, end_year + 1):
            num_rounds = get_rounds_in_season(year)
            for round_num in range(1, num_rounds + 1):
                futures.append(executor.submit(fetch_race_results, year, round_num))
        
        for future in futures:
            result = future.result()
            if result:
                all_results.extend(result)

    df_race_results = pd.DataFrame(all_results)
    df_race_results['date'] = pd.to_datetime(df_race_results['date'])
    return df_race_results

In [None]:
def get_race_schedule(start_year=2017, end_year=2024):
    all_races = []
    for year in range(start_year, end_year + 1):
        url = f'http://ergast.com/api/f1/{year}.json'
        response = requests.get(url)
        races = response.json()['MRData']['RaceTable']['Races']
        for race in races:
            race_info = {
                'season': race['season'],
                'round': race['round'],
                'raceName': race['raceName'],
                'date': race['date'],
                'time': race['time'],
                'circuitId': race['Circuit']['circuitId'],
                'circuitName': race['Circuit']['circuitName'],
                'location-locality': race['Circuit']['Location']['locality'],
                'location-country': race['Circuit']['Location']['country'],
                'location-lat': race['Circuit']['Location']['lat'],
                'location-long': race['Circuit']['Location']['long'],
            }
            all_races.append(race_info)
    df_race_schedule = pd.DataFrame(all_races)
    df_race_schedule['date'] = pd.to_datetime(df_race_schedule['date'])
    return df_race_schedule