In [None]:
import requests
from dotenv import load_dotenv
import os
import mysql.connector
import json
import csv
import time

In [44]:
load_dotenv()

db_host = os.getenv('DB_HOST')
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASSWORD')
db_name = os.getenv('DB_NAME')

In [None]:

api_calls = 0
def api_increment():
    global api_calls
    api_calls = api_calls + 1

In [None]:
#Finds the number of rounds in a given season

def get_number_of_rounds(year):
    url = f"http://localhost:8000/ergast/f1/{year}.json"
    api_increment()
    response = requests.get(url)
    data = response.json()
    mr_data = data["MRData"]
    number_of_rounds = mr_data["total"]
    return int(number_of_rounds)
    

In [None]:

#Finds the race name for the given round
def get_race_name(round_number, year):
    url = f"http://localhost:8000/ergast/f1/{year}/{round_number}.json"
    api_increment()
    response = requests.get(url)
    data = response.json()
    print(data)
    race_data = data["MRData"]["RaceTable"]["Races"]
    print(race_data)
    race_name = race_data[0]["raceName"]
    print(race_name)
    return(race_name)

In [48]:
#Converts the name from api output to the format that the database needs
def api_name_to_db_name(race_name):
    race_name_map = {
        "Bahrain Grand Prix": "bahrain",
        "Saudi Arabian Grand Prix": "saudi-arabia",
        "Australian Grand Prix": "australia",
        "Japanese Grand Prix": "japan",
        "Chinese Grand Prix": "china",
        "Miami Grand Prix": "miami",
        "Emilia Romagna Grand Prix": "emilia-romagna",
        "Monaco Grand Prix": "monaco",
        "Canadian Grand Prix": "canada",
        "Spanish Grand Prix": "spain",
        "Austrian Grand Prix": "austria",
        "British Grand Prix": "great-britain",
        "Hungarian Grand Prix": "hungary",
        "Belgian Grand Prix": "belgium",
        "Dutch Grand Prix": "netherlands",
        "Italian Grand Prix": "italy",
        "Azerbaijan Grand Prix": "azerbaijan",
        "Singapore Grand Prix": "singapore",
        "United States Grand Prix": "united-states",
        "Mexico City Grand Prix": "mexico",
        "São Paulo Grand Prix": "sao-paulo",
        "Las Vegas Grand Prix": "las-vegas",
        "Qatar Grand Prix": "qatar",
        "Abu Dhabi Grand Prix": "abu-dhabi",
    }
    return race_name_map.get(race_name, f"Error: '{race_name}' not found.")

In [49]:
def db_driver_name_to_api_name(name):
    driver_name_map = {    
    "carlos-sainz-jr": "sainz",
    "alexander-albon": "albon",
    "charles-leclerc": "leclerc",
    "daniel-ricciardo": "ricciardo",
    "fernando-alonso": "alonso",
    "franco-colapinto": "colapinto",
    "george-russell": "russell",
    "guanyu-zhou": "zhou",
    "kevin-magnussen": "magnussen",
    "lance-stroll": "stroll",
    "lando-norris": "norris",
    "lewis-hamilton": "hamilton",
    "liam-lawson": "lawson",
    "logan-sargeant": "sargeant",
    "max-verstappen": "max_verstappen",
    "nico-hulkenberg": "hulkenberg",
    "oscar-piastri": "piastri",
    "pierre-gasly": "gasly",
    "sergio-perez": "perez",
    "valtteri-bottas": "bottas",
    "yuki-tsunoda": "tsunoda",
    }
    return driver_name_map.get(name, f"Error: '{name}' not found.")

In [50]:
#Finds the number of laps for a specific race
def get_number_laps(race, year):
    connection = mysql.connector.connect(
        host=db_host,
        user =db_user,
        password = db_password,
        database = db_name,
        use_pure=True,
        ssl_disabled=True
    )

    mycursor = connection.cursor()

    mycursor.execute(f"select laps from race where grand_prix_id = '{race}' and year = '{year}'")
    tuple_laps = mycursor.fetchall()
    laps = tuple_laps[0][0]
    return laps

In [51]:
#Collects the distance and the course length for a specific race
def get_race_distance(race, year):
    connection = mysql.connector.connect(
        host=db_host,
        user =db_user,
        password = db_password,
        database = db_name,
        use_pure=True,
        ssl_disabled=True
    )

    mycursor = connection.cursor()

    mycursor.execute(f"select course_length, distance from race where grand_prix_id = '{race}' and year = '{year}'")
    tuple_length = mycursor.fetchall()
    course_length = tuple_length[0][0]
    distance = tuple_length[0][1]
    return course_length, distance

In [52]:
#Retrieves the Qualifying time for a specific driver at a specific race
def get_quali_time(year, race, driver):
    connection = mysql.connector.connect(
        host=db_host,
        user =db_user,
        password = db_password,
        database = db_name,
        use_pure=True,
        ssl_disabled=True
    )

    mycursor = connection.cursor()

    mycursor.execute(f"select id from race where grand_prix_id = '{race}' and year = '{year}'")
    race_id_tuple = mycursor.fetchall()
    race_id = race_id_tuple[0][0]



    mycursor.execute(f"select q1_millis, q2_millis, q3_millis from qualifying_result where race_id = '{race_id}' and driver_id = '{driver}'")
    quali_tuple = mycursor.fetchall()
    if not quali_tuple:
        print(f"No quali data found for {driver} in {race}, {year}")
        return None
    q1_time = quali_tuple[0][0]
    q2_time = quali_tuple[0][1]
    q3_time = quali_tuple[0][2]
    if q2_time == None:
        return q1_time/1000
    elif q3_time == None:
        return q2_time/1000
    else:
        return q3_time/1000


In [53]:
#Converts time from minutes to seconds
#Used for the lap time
def time_to_seconds(t):
    minutes, seconds = t.split(':')
    seconds = float(seconds)
    return int(minutes) * 60 + seconds

In [None]:
#Gets the lap data for a driver on a specific race
def get_lap_data(race, year, laps):
    offset=0
    race_data = []
    for i in range(1, (laps//5)+1):
        print(f"Lap {i}")
        url = f"http://localhost:8000/ergast/f1/{year}/{race}/laps.json?offset={offset}&limit=100"
        response = requests.get(url)
        
        if response.status_code != 200:
            print(f"Error: Failed to retrieve data for lap {i}")
            continue

        data = response.json()
        print(data)
        try:
            races = data["MRData"]["RaceTable"]["Races"]
            grand_prix_name = races[0]["raceName"]
            print(grand_prix_name)
            for lap in races[0]["Laps"]:
                lap_number = lap["number"]
                for timing in lap["Timings"]:
                    driver_id = timing["driverId"]
                    position = timing["position"]
                    lap_time = time_to_seconds(timing["time"])
                    
                    race_data.append({
                        "year": year,
                        "race_name": grand_prix_name,
                        "lap_number": lap_number,
                        "driver_id": driver_id,
                        "position": position,
                        "lap_time": lap_time
                    })
        except (KeyError, IndexError) as e:
            print(e)
        offset = offset + 100
        #The method must sleep to avoid Error 429.
        time.sleep(5)
    return race_data

In [56]:
#Writes all the lap data to a csv file for better storage
def write_to_csv(race_data):
    if race_data:     
        with open('all_race_data.csv', 'a', newline='') as csvfile:
            fieldnames = race_data[0].keys()
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            #writer.writeheader()
            writer.writerows(race_data)
        print("Data written")
    else:
        print("No data available")
        

In [None]:
#Method that gets and writes all the needed data to the csv file
def get_data(round, year):
    race_name = get_race_name(round, year)
    print(race_name, year)
    db_race_name = api_name_to_db_name(race_name)
    num_laps = get_number_laps(db_race_name, year)
    print(num_laps)
    race_data = get_lap_data(round, year, num_laps)
    write_to_csv(race_data)

In [58]:
#Converts time from seconds to minutes
def seconds_to_minutes(s):
    minutes = s // 60
    remaining_seconds = s % 60
    return f"{minutes}:{remaining_seconds}"


In [None]:
# year = 2024
# #for year in YEARS:
# rounds = get_number_of_rounds(year)
# for round in range(1, rounds+1):
#     print(f"Processing round: {round} for year: {year}")
#     print(round)
#     for driver in DRIVER_NAMES:
#         get_data(driver, round, year)
#         time.sleep(5)
#     time.sleep(10)
# time.sleep(30)

# print("done")

In [60]:
url = "http://localhost:8000/ergast/f1/2024/17//laps.json?limit=100"
response = requests.get(url)
data = response.json()
print(data)

{'MRData': {'xmlns': '', 'series': 'f1', 'url': 'http://localhost:8000/ergast/f1/2024/17//laps.json', 'limit': '100', 'offset': '0', 'total': '971', 'RaceTable': {'season': '2024', 'round': '17', 'Races': [{'season': '2024', 'round': '17', 'url': 'https://en.wikipedia.org/wiki/2024_Azerbaijan_Grand_Prix', 'raceName': 'Azerbaijan Grand Prix', 'Circuit': {'circuitId': 'baku', 'url': 'http://en.wikipedia.org/wiki/Baku_City_Circuit', 'circuitName': 'Baku City Circuit', 'Location': {'lat': '40.3725', 'long': '49.8533', 'locality': 'Baku', 'country': 'Azerbaijan'}}, 'date': '2024-09-15', 'time': '11:00:00Z', 'Laps': [{'number': '1', 'Timings': [{'driverId': 'leclerc', 'position': '1', 'time': '1:52.106'}, {'driverId': 'piastri', 'position': '2', 'time': '1:52.903'}, {'driverId': 'perez', 'position': '3', 'time': '1:53.923'}, {'driverId': 'sainz', 'position': '4', 'time': '1:54.797'}, {'driverId': 'max_verstappen', 'position': '5', 'time': '1:55.490'}, {'driverId': 'russell', 'position': '6',

In [61]:
get_data(22, 2024)

{'MRData': {'xmlns': '', 'series': 'f1', 'url': 'http://localhost:8000/ergast/f1/2024/22.json', 'limit': '30', 'offset': '0', 'total': '1', 'RaceTable': {'season': '2024', 'round': '22', 'Races': [{'season': '2024', 'round': '22', 'url': 'https://en.wikipedia.org/wiki/2024_Las_Vegas_Grand_Prix', 'raceName': 'Las Vegas Grand Prix', 'Circuit': {'circuitId': 'vegas', 'url': 'https://en.wikipedia.org/wiki/Las_Vegas_Grand_Prix#Circuit', 'circuitName': 'Las Vegas Strip Street Circuit', 'Location': {'lat': '36.1147', 'long': '-115.173', 'locality': 'Las Vegas', 'country': 'United States'}}, 'date': '2024-11-23', 'time': '06:00:00Z', 'FirstPractice': {'date': '2024-11-21', 'time': '02:30:00Z'}, 'SecondPractice': {'date': '2024-11-21', 'time': '06:00:00Z'}, 'ThirdPractice': {'date': '2024-11-22', 'time': '02:30:00Z'}, 'Qualifying': {'date': '2024-11-22', 'time': '06:00:00Z'}}]}}}
[{'season': '2024', 'round': '22', 'url': 'https://en.wikipedia.org/wiki/2024_Las_Vegas_Grand_Prix', 'raceName': 'La

ConnectionError: HTTPConnectionPool(host='api.lopi.ca', port=80): Max retries exceeded with url: /ergast/f1/2024/22/laps.json?offset=0&limit=100 (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x7f6b67355e80>: Failed to resolve 'api.lopi.ca' ([Errno -2] Name or service not known)"))