In [18]:
import requests
from bs4 import BeautifulSoup as bs
import json

In [2]:
base_url = "https://ergast.com/api/f1/"

In [105]:
races = []

for y in range(2018, 2024):
    year = str(y)
    season_url = base_url + year
    response = requests.get(season_url)
    soup = bs(response.content, "xml")
    race_elements = soup.find_all("Race")
    for race in race_elements:
        race_name = race.find("RaceName").text
        race_date = race.find("Date").text
        race_name = race_name.replace(' ', '_')
        races.append([race_date.split('-')[0],
                     race_date, race_name])

In [71]:
def createLiveTimingUrl(race_info):
    year, date, race_name = race_info
    lt_url = "https://livetiming.formula1.com/static/" + year +\
             "/" + date + "_" + race_name + "/" + date +\
             "_Race/RaceControlMessages.json"
    return lt_url, year, race_name

In [106]:
race_data = []

for race in races:
    race_data.append(createLiveTimingUrl(race))

In [107]:
def retrieveMessages(lt_url):
    response = requests.get(lt_url)
    data_str = response.text.encode().decode('utf-8-sig')
    data_json = json.loads(data_str)
    return data_json


def parseData(lt_url):
    try:
        data_json = retrieveMessages(lt_url)
    except json.JSONDecodeError:
        return None
    messages = {}
    last_lap = data_json['Messages'][-1]['Lap']
    for i in range(1, last_lap + 1):
        messages[str(i)] = []
    for lap in messages:
        new_messages = []
        try:
            messages_for_lap = [x for x in data_json['Messages'] if x['Lap'] == int(lap)]
        except KeyError:
            continue
        for msg in messages_for_lap:
            new_dict = {}
            for key in msg:
                if key != 'Utc' and key != 'Lap':
                    new_dict[key.lower()] = msg[key]
            new_messages.append(new_dict)
        messages[lap] = new_messages
    return messages

In [108]:
for data in race_data:
    lt_url, year, rn = data
    messages = parseData(lt_url)
    if not messages:
        print(lt_url)
        continue
    fn = './LiveTiming_Data/' + year + '_' +\
         rn.replace('_Grand_Prix', '').lower() + '.json'
    with open(fn, 'w') as f:
        json.dump(messages, f)

https://livetiming.formula1.com/static/2023/2023-10-29_Mexico_City_Grand_Prix/2023-10-29_Race/RaceControlMessages.json
https://livetiming.formula1.com/static/2023/2023-11-05_São_Paulo_Grand_Prix/2023-11-05_Race/RaceControlMessages.json
https://livetiming.formula1.com/static/2023/2023-11-19_Las_Vegas_Grand_Prix/2023-11-19_Race/RaceControlMessages.json
https://livetiming.formula1.com/static/2023/2023-11-26_Abu_Dhabi_Grand_Prix/2023-11-26_Race/RaceControlMessages.json
