In [32]:
import pandas as pd
import numpy as np

In [3]:
# Load the qualifying data from a CSV file
qualifying_data = pd.read_csv('qualifying.csv')

# Sort the data by driver, race, and qualifying round
qualifying_data.sort_values(['driverId', 'raceId', 'position'], inplace=True)

In [4]:
qualifying_data.head()

Unnamed: 0,qualifyId,raceId,driverId,constructorId,number,position,q1,q2,q3
2998,3000,1,1,1,1,15,1:26.454,\N,\N
3016,3018,2,1,1,1,13,1:35.280,1:34.905,\N
3032,3034,3,1,1,1,9,1:35.776,1:35.740,1:38.595
3048,3050,4,1,1,1,5,1:32.851,1:32.877,1:34.196
3077,3079,5,1,1,1,14,1:20.991,1:20.805,\N


In [17]:
print(qualifying_data.columns)


Index(['qualifyId', 'raceId', 'driverId', 'constructorId', 'number',
       'position', 'q1', 'q2', 'q3', 'best_qualy_time'],
      dtype='object')


In [38]:

# Define a function to extract the qualifying time for each driver
def get_qualifying_time(driver_id, race_id):
    # Get the qualifying data for the given driver and race
    driver_data = qualifying_data[(qualifying_data['driverId'] == driver_id) & (qualifying_data['raceId'] == race_id)]

    # Find the last qualifying round where the driver participated
    last_round = 0
    for round in range(1, 4):
        if driver_data[f'q{round}'].iloc[0] != '\\N':
            last_round = round

    # Get the best lap time for the driver in the last qualifying round
    best_time = 0
    if last_round > 0:
        best_time = driver_data[f'q{last_round}'].min()

    # If the driver didn't make it to Q3, use the best lap time in Q2 instead
    if last_round == 2:
        if driver_data['q2'].iloc[0] != '\\N':
            best_time = driver_data['q2'].min()

    # If the driver didn't make it to Q2, use the best lap time in Q1 instead
    if last_round == 1:
        if driver_data['q1'].iloc[0] != '\\N':
            best_time = driver_data['q1'].min()

    return best_time

# Add a new column to the data frame for the best qualifying time
qualifying_data['best_qualy_time'] = 0

# Loop over all rows in the data frame and update the best_qualy_time column
for i, row in qualifying_data.iterrows():
    driver_id = row['driverId']
    race_id = row['raceId']
    qualifying_time = get_qualifying_time(driver_id, race_id)
    if row['q1'] == '\\N':
        qualifying_time = 0
    qualifying_data.at[i, 'best_qualy_time'] = qualifying_time

# Print the updated data frame
qualifying_data.head()



Unnamed: 0,qualifyId,raceId,driverId,constructorId,number,position,q1,q2,q3,best_qualy_time
2998,3000,1,1,1,1,15,1:26.454,\N,\N,1:26.454
3016,3018,2,1,1,1,13,1:35.280,1:34.905,\N,1:34.905
3032,3034,3,1,1,1,9,1:35.776,1:35.740,1:38.595,1:38.595
3048,3050,4,1,1,1,5,1:32.851,1:32.877,1:34.196,1:34.196
3077,3079,5,1,1,1,14,1:20.991,1:20.805,\N,1:20.805


In [35]:
num_zero_qualy_times = len(qualifying_data[qualifying_data['best_qualy_time'] == 0])
print(f'Number of rows with 0 best qualifying time: {num_zero_qualy_times}')


Number of rows with 0 best qualifying time: 139
