# **Actual Cut-off Times**

- This notebook will work out what the cut-off times were for Q1 and Q2 for all historical data - it will use the non-fuel adjusted lap data so that it shows what the actual cut-off was

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import os
import itertools
from collections import defaultdict
import matplotlib.pyplot as plt

In [16]:
## Getting a list of all event files in the right format for the read data function ##

files = os.listdir('/Users/chanelbrown/Desktop/Notebooks/All CSV Files - without fuel adjusted lap times/')

events = []

for file in files:
    events.append(file[0:16])

events.remove('.DS_Store')

# Same event track will confuse the code later on so run seperately for these tracks:
events.remove('07_12A1r_Qu_2020')
events.remove('08_09Sil_Qu_2020')

#events.remove('07_05A1r_Qu_2020')
#events.remove('08_02Sil_Qu_2020')

In [17]:
## Function reads data into a Pandas DataFrame from a CSV file ##

def read_data(filename):
    
    df = pd.read_csv('/Users/chanelbrown/Desktop/Notebooks/All CSV Files - without fuel adjusted lap times/%s.csv' % (filename))
    
    # select required features
    d = df[['Season', 'Circuit', 'Session', 'Driver Short Name', 'Team', 'Outing Number','Lap number',
                   'Sector 1', 'Sector 2', 'Sector 3', 'Full Lap (no fuel adjustment)']].copy()
    
    ''' some sectors have missing values (probably where car has been taken off, 
    convert the white space to NaN objects and drop them'''
    
    d.replace(' ', np.nan, inplace = True) 
                                              
    data = d.dropna()
    
    return data

In [18]:
## Function finds which drivers made it into each sub-session ##

def find_drivers(data, session):
    
    session_data = data[data.loc[:,'Session'] == session]
    drivers = list(np.unique(session_data['Driver Short Name']))
    
    return drivers

In [19]:
## Function finds the fastest actual lap for each driver ## 

def fastest_actual_lap(data, driver, session):
    
    driver_sesh = data.loc[(data.loc[:, 'Driver Short Name'] == driver) & (data.Session == session), :]
    
    fastest = np.min(driver_sesh['Full Lap (no fuel adjustment)'])
    
    #print(f'The fastest theoretical lap for {team} in {session} is {fastest} seconds')
    
    season = data.loc[1, 'Season'] # finds the season of the fastest theoretical lap
    circuit = data.loc[1, 'Circuit'] # finds the circuit of the fastest theoretical lap
    
    team = driver_sesh.loc[driver_sesh.loc[:, 'Full Lap (no fuel adjustment)'] == fastest].iloc[0, 4] # finds the team of the fastest theoretical lap
    
    return season, circuit, team, driver, session, fastest

In [20]:
## Function puts the fastest actual lap per team into a dictionary where the qualifying session is the key ##

def create_qualifier_dict(data, sessions):
    
    qualifiers = {} # {'Q1' : df, 'Q2': df, 'Q3': df}
    
    for s in sessions:

        drivers = find_drivers(data, s)

        df = pd.DataFrame(columns = ['Season', 'Circuit', 'Team', 'Driver', 'Session', 'Fastest Actual Lap Time'])

        for d in drivers:

            season, circuit, team, driver, session, fastest = fastest_actual_lap(data, driver = d, session = s) 
            
            to_append = [season, circuit, team, driver, session, fastest] # values to append to empty dataframe
            a_series = pd.Series(to_append, index = df.columns) # convert into a series first
            df = df.append(a_series, ignore_index = True) 

        df = df.sort_values(by = ['Fastest Actual Lap Time'])

        qualifiers[s] = df # adds each set of results into a dictionary that has {qualifying round: data frame of fastest laps}
       
        
    return qualifiers, circuit

In [21]:
## Runs through all events available and works out actual laps for each qualifying session specified ##

sessions = ['Q1', 'Q2']

q = []
c = []

for event in events:
    
    data = read_data(event)
   
    qualifiers, circuit = create_qualifier_dict(data, sessions)
    
    q.append(qualifiers)
    c.append(circuit)

assert len(q) == len(c)

circuit_fastest_laps = defaultdict(list) # returns a dictionary where the keys are circuits and the values are qualifying dictionaries for each year

for i, j in zip(c, q):
    circuit_fastest_laps[i].append(j)

print(circuit_fastest_laps.keys())

dict_keys(['Barcelona', 'Bahrain', 'Sochi', 'Monza', 'Suzuka', 'A1-Ring', 'Melbourne', 'Sepang', 'Hockenheim', 'Mexico City', 'Montreal', 'Spa', 'Baku', 'Monaco', 'Austin', 'Interlagos', 'Silverstone', 'Budapest', 'Nurburgring', 'Shanghai', 'Abu Dhabi', 'Mugello', 'Singapore', 'Istanbul', 'Paul Ricard', 'Sakhir', 'Portimao', 'Imola'])


In [22]:
## Returns dictionary containing the laps for Q1 and Q2 when track and year specified ##

def return_dict(year, track, res):
    
    if res[track][0]['Q1'].loc[0, 'Season'] == year:
        return {'Q1': res[track][0]['Q1'], 'Q2': res[track][0]['Q2']}
    
    else:
        pass
    
    if res[track][1]['Q1'].loc[0, 'Season'] == year:
        return {'Q1': res[track][1]['Q1'], 'Q2': res[track][1]['Q2']}
    
    else:
        pass
    
    if res[track][2]['Q1'].loc[0, 'Season'] == year:
        return {'Q1': res[track][2]['Q1'], 'Q2': res[track][2]['Q2']}
    
    else:
        pass
    
    if res[track][3]['Q1'].loc[0, 'Season'] == year:
        return {'Q1': res[track][3]['Q1'], 'Q2': res[track][3]['Q2']}
    
    else:
        print('Enter valid arguments: year is not a string, track is a string')

In [23]:
## Finds cut-off points ##

def find_cutoff(input_data):

    Q1 = input_data['Q1']
    Q2 = input_data['Q2']
    
    Q1_cut = Q1.iloc[:, -1].nsmallest(16).iloc[-1] # 16th place is the score to beat, base it on below the 8th
    Q2_cut = Q2.iloc[:, -1].nsmallest(11).iloc[-1] # 11th place is the score to beat, base it on below the 5th
  
    #print(f'Q1 cut-off is {round(Q1_cut, 3)} seconds')
    #print(f'Q2 cut-off is {round(Q2_cut, 3)} seconds')
    
    return Q1_cut, Q2_cut

In [24]:
## Find the Q1 and Q2 cut-off points and the speed ratios for all years for all tracks - return info in DF ##


Q1_cutoff = pd.DataFrame(columns = ['Circuit', 'Cut-off 2017', 'Cut-off 2018', 'Cut-off 2019', 'Cut-off 2020'])
Q2_cutoff = pd.DataFrame(columns = ['Circuit', 'Cut-off 2017', 'Cut-off 2018', 'Cut-off 2019', 'Cut-off 2020'])


for track in circuit_fastest_laps.keys():
    
    try:
        event_2017 = return_dict(2017, track, circuit_fastest_laps)
        Q1_cut_2017, Q2_cut_2017 = find_cutoff(event_2017)
        
    except:
        Q1_cut_2017, Q2_cut_2017 = None, None
        
    
    try:
        event_2018 = return_dict(2018, track, circuit_fastest_laps)
        Q1_cut_2018, Q2_cut_2018 = find_cutoff(event_2018)
    
    except:
        Q1_cut_2018, Q2_cut_2018 = None, None
        
        
    try:
        event_2019 = return_dict(2019, track, circuit_fastest_laps)
        Q1_cut_2019, Q2_cut_2019 = find_cutoff(event_2019)
    
    except:
        Q1_cut_2019, Q2_cut_2019 = None, None
    
    
    try:
        event_2020 = return_dict(2020, track, circuit_fastest_laps)
        Q1_cut_2020, Q2_cut_2020 = find_cutoff(event_2020)
    
    except:
        Q1_cut_2020, Q2_cut_2020 = None, None

   
    to_append = [track, Q1_cut_2017, Q1_cut_2018, Q1_cut_2019, Q1_cut_2020] # values to append to empty dataframe
    series = pd.Series(to_append, index = Q1_cutoff.columns) # convert into a series first
    Q1_cutoff = Q1_cutoff.append(series, ignore_index = True) 

    
    to_append = [track, Q2_cut_2017, Q2_cut_2018, Q2_cut_2019, Q2_cut_2020] # values to append to empty dataframe
    series = pd.Series(to_append, index = Q2_cutoff.columns) # convert into a series first
    Q2_cutoff = Q2_cutoff.append(series, ignore_index = True) 
    

In [11]:
# DO NOT rerun without changing the events list above
'''
sil_a1r_second_Q1 = Q1_cutoff.loc[(2, 16), :]
sil_a1r_second_Q2 = Q2_cutoff.loc[(2, 16), :]'''

In [25]:
Q1_cutoff

Unnamed: 0,Circuit,Cut-off 2017,Cut-off 2018,Cut-off 2019,Cut-off 2020
0,Barcelona,82.332,78.923,78.404,77.908
1,Bahrain,92.118,90.53,90.026,89.491
2,Sochi,96.462,95.037,94.84,94.592
3,Monza,100.489,81.888,80.784,
4,Suzuka,90.849,90.361,89.822,
5,A1-Ring,66.345,65.271,64.789,65.164
6,Melbourne,86.419,84.532,83.017,
7,Sepang,93.308,,,
8,Hockenheim,,73.72,73.333,
9,Mexico City,79.176,76.911,78.065,


In [13]:
sil_a1r_second_Q1 # DO NOT rerun without changing the events list above

Unnamed: 0,Circuit,Cut-off 2017,Cut-off 2018,Cut-off 2019,Cut-off 2020
2,A1-Ring,66.345,65.271,64.789,81.372
16,Silverstone,102.573,88.456,86.662,87.882


In [26]:
Q2_cutoff

Unnamed: 0,Circuit,Cut-off 2017,Cut-off 2018,Cut-off 2019,Cut-off 2020
0,Barcelona,81.329,78.323,77.338,77.168
1,Bahrain,90.923,90.105,89.488,89.149
2,Sochi,95.948,93.995,93.95,93.239
3,Monza,97.582,81.669,80.517,
4,Suzuka,89.778,89.864,89.254,
5,A1-Ring,65.597,64.845,64.49,64.206
6,Melbourne,85.081,83.692,82.562,
7,Sepang,92.034,,,
8,Hockenheim,,73.657,72.786,
9,Mexico City,78.099,76.844,76.687,


In [15]:
sil_a1r_second_Q2 # DO NOT rerun without changing the events list above

Unnamed: 0,Circuit,Cut-off 2017,Cut-off 2018,Cut-off 2019,Cut-off 2020
2,A1-Ring,65.597,64.845,64.49,79.628
16,Silverstone,90.193,87.901,86.519,87.011


In [32]:
sil_a1r_second_Q2.to_csv('sil_a1r_second_Q2.csv')