In [None]:
import requests
from dotenv import load_dotenv
import os
import mysql.connector
import csv
import time
from constants import YEARS
from decimal import Decimal

In [244]:
load_dotenv()

db_host = os.getenv('DB_HOST')
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASSWORD')
db_name = os.getenv('DB_NAME')

In [245]:
def get_number_of_rounds(year):
    time.sleep(1)
    url = f"http://localhost:8000/ergast/f1/{year}.json"
    response = requests.get(url)
    data = response.json()
    mr_data = data["MRData"]
    number_of_rounds = mr_data["total"]
    return int(number_of_rounds)

In [None]:
def get_race_name(round_number, year):
    time.sleep(1)
    url = f"http://localhost:8000/ergast/f1/{year}/{round_number}.json"
    response = requests.get(url)
    data = response.json()
    race_data = data["MRData"]["RaceTable"]["Races"]
    race_name = race_data[0]["raceName"]
    return(race_name)

In [None]:

def api_name_to_db_name(race_name):
    race_name_map = {
        "Bahrain Grand Prix": "bahrain",
        "Saudi Arabian Grand Prix": "saudi-arabia",
        "Australian Grand Prix": "australia",
        "Japanese Grand Prix": "japan",
        "Chinese Grand Prix": "china",
        "Miami Grand Prix": "miami",
        "Emilia Romagna Grand Prix": "emilia-romagna",
        "Monaco Grand Prix": "monaco",
        "Canadian Grand Prix": "canada",
        "Spanish Grand Prix": "spain",
        "Austrian Grand Prix": "austria",
        "British Grand Prix": "great-britain",
        "Hungarian Grand Prix": "hungary",
        "Belgian Grand Prix": "belgium",
        "Dutch Grand Prix": "netherlands",
        "Italian Grand Prix": "italy",
        "Azerbaijan Grand Prix": "azerbaijan",
        "Singapore Grand Prix": "singapore",
        "United States Grand Prix": "united-states",
        "Mexico City Grand Prix": "mexico",
        "São Paulo Grand Prix": "sao-paulo",
        "Las Vegas Grand Prix": "las-vegas",
        "Qatar Grand Prix": "qatar",
        "Abu Dhabi Grand Prix": "abu-dhabi",
        "French Grand Prix" : "france"
    }
    return race_name_map.get(race_name, f"Error: '{race_name}' not found.")

In [248]:
#Collects the distance and the course length for a specific race
def get_race_distance(race, year):
    connection = mysql.connector.connect(
        host=db_host,
        user =db_user,
        password = db_password,
        database = db_name,
        use_pure=True,
        ssl_disabled=True
    )

    mycursor = connection.cursor()

    mycursor.execute(f"select course_length, distance from race where grand_prix_id = '{race}' and year = '{year}'")
    tuple_length = mycursor.fetchall()
    course_length = tuple_length[0][0]
    distance = tuple_length[0][1]
    return course_length, distance

In [249]:
#Finds the number of laps for a specific race
def get_number_laps(race, year):
    connection = mysql.connector.connect(
        host=db_host,
        user =db_user,
        password = db_password,
        database = db_name,
        use_pure=True,
        ssl_disabled=True
    )

    mycursor = connection.cursor()

    mycursor.execute(f"select laps from race where grand_prix_id = '{race}' and year = '{year}'")
    tuple_laps = mycursor.fetchall()
    laps = tuple_laps[0][0]
    return laps

In [250]:
def get_circuit_info(race, year):
    circuit_data = []
    connection = mysql.connector.connect(
        host=db_host,
        user =db_user,
        password = db_password,
        database = db_name,
        use_pure=True,
        ssl_disabled=True
    )

    mycursor = connection.cursor()
    mycursor.execute(f"select laps from race where grand_prix_id = '{race}' and year = '{year}'")
    tuple_laps = mycursor.fetchall()
    laps = tuple_laps[0][0]
    mycursor.execute(f"select course_length, distance from race where grand_prix_id = '{race}' and year = '{year}'")
    tuple_length = mycursor.fetchall()
    course_length = tuple_length[0][0]
    distance = tuple_length[0][1]
    circuit_data.append({
        "Year" : year,
        "race_name" : race,
        "Laps" : laps,
        "Course_length": course_length,
        "distance" : distance
    })
    return circuit_data
    

In [251]:
#Converts time from minutes to seconds
#Used for the lap time
def time_to_seconds(t):
    minutes, seconds = t.split(':')
    seconds = float(seconds)
    return int(minutes) * 60 + seconds

In [252]:
#Retrieves the Qualifying time for a specific driver at a specific race
def get_quali_time(year, race):
    quali_data = []

    connection = mysql.connector.connect(
        host=db_host,
        user =db_user,
        password = db_password,
        database = db_name,
        use_pure=True,
        ssl_disabled=True
    )

    mycursor = connection.cursor()

    mycursor.execute(f"select id from race where grand_prix_id = '{race}' and year = '{year}'")
    race_id_tuple = mycursor.fetchall()
    race_id = race_id_tuple[0][0]



    mycursor.execute(f"select q1_millis, q2_millis, q3_millis, driver_id from qualifying_result where race_id = '{race_id}' ")
    quali_tuple = mycursor.fetchall()
    if not quali_tuple:
        print(f"No quali data found in {race}, {year}")
        return None
    for i in range(len(quali_tuple)):
        q1_time = quali_tuple[i][0]
        q2_time = quali_tuple[i][1]
        q3_time = quali_tuple[i][2]
        driver = quali_tuple[i][3]
        if q1_time == None:
            print("No qualifying data found")
            continue

        elif q2_time == None:
            quali_data.append({
                "year": year,
                "race_name": race,
                "driver" : driver,
                "Qualifying_time" : q1_time/1000
            })
        elif q3_time == None:
            quali_data.append({
                "year": year,
                "race_name": race,
                "driver" : driver,
                "Qualifying_time" : q2_time/1000
            })
        else:
            quali_data.append({
                "year": year,
                "race_name": race,
                "driver" : driver,
                "Qualifying_time" : q3_time/1000
            })
    return quali_data

In [None]:
def write_to_csv(race_data):
    if race_data:     
        with open('all_quali_data.csv', 'a', newline='') as csvfile:
            fieldnames = race_data[0].keys()
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writerows(race_data)
        print("Data written")
    else:
        print("No data available")

In [254]:
def write_quali_to_csv(quali_data):
    if quali_data:
        file_exists = os.path.isfile('all_quali_data.csv') and os.path.getsize('all_quali_data.csv') > 0
        with open('all_quali_data.csv', 'a', newline='') as csvfile:
            fieldnames = quali_data[0].keys()
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            if not file_exists:
                writer.writeheader()
            writer.writerows(quali_data)
        print("Data written")
    else:
        print("No Data Available")

In [255]:
def write_circuit_data_csv(circuit_data):
    if circuit_data:
        for entry in circuit_data:
            for key, value in entry.items():
                if isinstance(value, Decimal):
                    #Convert the decimals to a float
                    entry[key] = float(value)
        file_exists = os.path.isfile('all_circuit_data.csv') and os.path.getsize('all_circuit_data.csv') > 0
        with open('all_circuit_data.csv', 'a', newline='') as csvfile:
            fieldnames = circuit_data[0].keys()
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            if not file_exists:
                writer.writeheader()
            print(circuit_data)
            writer.writerows(circuit_data)
        print("Data Written")
    else:
        print("No Data Available")


In [None]:
#Method that gets all the data involving qualifying sessions
#As well as getting the circuti information like track distance and number of laps
def get_quali_data(round, year):
    race_name = get_race_name(round, year)
    db_race_name = api_name_to_db_name(race_name)
    circuit_data = get_circuit_info(db_race_name, year)
    quali_data = get_quali_time(year, db_race_name)
    write_circuit_data_csv(circuit_data)

In [257]:
for year in YEARS:
    for r in range(1, get_number_of_rounds(year)+1):
        get_quali_data(r, year)
        

[{'Year': 2022, 'race_name': 'bahrain', 'Laps': 57, 'Course_length': 5.412, 'distance': 308.238}]
Data Written
No qualifying data found
[{'Year': 2022, 'race_name': 'saudi-arabia', 'Laps': 50, 'Course_length': 6.174, 'distance': 308.45}]
Data Written
No qualifying data found
[{'Year': 2022, 'race_name': 'australia', 'Laps': 58, 'Course_length': 5.303, 'distance': 307.574}]
Data Written
No qualifying data found
[{'Year': 2022, 'race_name': 'emilia-romagna', 'Laps': 63, 'Course_length': 4.909, 'distance': 309.049}]
Data Written
[{'Year': 2022, 'race_name': 'miami', 'Laps': 57, 'Course_length': 5.41, 'distance': 308.37}]
Data Written
[{'Year': 2022, 'race_name': 'spain', 'Laps': 66, 'Course_length': 4.675, 'distance': 308.424}]
Data Written
[{'Year': 2022, 'race_name': 'monaco', 'Laps': 64, 'Course_length': 3.337, 'distance': 213.568}]
Data Written
[{'Year': 2022, 'race_name': 'azerbaijan', 'Laps': 51, 'Course_length': 6.003, 'distance': 306.049}]
Data Written
[{'Year': 2022, 'race_name':