# Driver History Tables

In [2]:
import pandas as pd
import copy
import warnings
import datetime as dt

warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [4]:
# Import all the data
drivers_df = pd.read_csv("./f1db_csv/drivers.csv").drop(columns = "url")
results_df = pd.read_csv("./f1db_csv/results.csv")
constructors_df = pd.read_csv("./f1db_csv/constructors.csv")
races_df = pd.read_csv("./f1db_csv/races.csv")
qualifying_df = pd.read_csv("./f1db_csv/qualifying.csv")
d_standings_df = pd.read_csv("./f1db_csv/driver_standings.csv")

# Clean some names and create new variables
# drivers_df
drivers_df["number"] = drivers_df["number"].replace({r"\N": None})
drivers_df["driverName"] = drivers_df["forename"].str.cat(drivers_df["surname"],sep = " ")
drivers_df = drivers_df.drop(columns = ["forename", "surname"])

# results_df
results_df["position"] = results_df["position"].replace({r"\N": None})

# Merging to a full dataset for driver history
df_1 = pd.merge(drivers_df[["driverId", "driverName", "number", "nationality"]], results_df[["driverId", "raceId", "constructorId", "position", "fastestLapTime"]], on = "driverId")
df_2 = pd.merge(df_1, constructors_df[["constructorId", "name"]], on = "constructorId")
df_2 = df_2.rename({"position" : "racePosition"}, axis = "columns")
df_2 = df_2.rename({"name" : "constructorName"}, axis = "columns")
df_3 = pd.merge(df_2, d_standings_df[["driverId", "raceId", "points", "position", "wins"]], on = ["driverId", "raceId"])
df_3 = df_3.rename({"position" : "driverStanding"}, axis = "columns")
df_4 = pd.merge(df_3, races_df[["raceId", "year", "name", "date"]], on = "raceId")
df_5 = pd.merge(df_4, qualifying_df[["raceId", "driverId", "position", "q1", "q2", "q3"]], on = ["driverId", "raceId"])

for i in range(len(df_5['q1'])):
    nan_series = df_5.q1.isna()[i]
    if (df_5['q1'][i] == r"\N") | (nan_series == True):
        df_5['q1'][i] = None
        i += 1
    elif df_5['q1'][i] != 0:
        df_5['q1'][i] = float(str(df_5['q1'][i]).split(':')[1]) + (60 * float(str(df_5['q1'][i]).split(':')[0]))
        i += 1
    else:
        df_5['q1'][i] = None
        i += 1

for i in range(len(df_5['q2'])):
    nan_series = df_5.q2.isna()[i]
    if (df_5['q2'][i] == r"\N") | (nan_series == True):
        df_5['q2'][i] = None
        i += 1
    elif df_5['q2'][i] != 0:
        df_5['q2'][i] = float(str(df_5['q2'][i]).split(':')[1]) + (60 * float(str(df_5['q2'][i]).split(':')[0]))
        i += 1
    else:
        df_5['q2'][i] = None
        i += 1

for i in range(len(df_5['q3'])):
    nan_series = df_5.q3.isna()[i]
    if (df_5['q3'][i] == r"\N") | (nan_series == True):
        df_5['q3'][i] = None
        i += 1
    elif df_5['q3'][i] != 0:
        df_5['q3'][i] = float(str(df_5['q3'][i]).split(':')[1]) + (60 * float(str(df_5['q3'][i]).split(':')[0]))
        i += 1
    else:
        df_5['q3'][i] = None
        i += 1
        
for i in range(len(df_5['fastestLapTime'])):
    nan_series = df_5.fastestLapTime.isna()[i]
    if (df_5['fastestLapTime'][i] == r"\N") | (nan_series == True):
        df_5['fastestLapTime'][i] = None
        i += 1
    elif df_5['fastestLapTime'][i] != 0:
        df_5['fastestLapTime'][i] = float(str(df_5['fastestLapTime'][i]).split(':')[1]) + (60 * float(str(df_5['fastestLapTime'][i]).split(':')[0]))
        i += 1
    else:
        df_5['fastestLapTime'][i] = None
        i += 1

df_5["minQualifyingTime"] = df_5[["q1", "q2", "q3"]].min(skipna = True, axis = 1)
df_5 = df_5.drop(columns = ["q1", "q2", "q3"])
df_5.head(10)

Unnamed: 0,driverId,driverName,number,nationality,raceId,constructorId,racePosition,fastestLapTime,constructorName,points,driverStanding,wins,year,name,date,position,minQualifyingTime
0,1,Lewis Hamilton,44.0,British,18,1,1,87.452,McLaren,10.0,1,1,2008,Australian Grand Prix,2008-03-16,1,85.187
1,5,Heikki Kovalainen,,Finnish,18,1,5,87.418,McLaren,4.0,5,0,2008,Australian Grand Prix,2008-03-16,3,85.452
2,2,Nick Heidfeld,,German,18,2,2,87.739,BMW Sauber,8.0,2,0,2008,Australian Grand Prix,2008-03-16,5,85.518
3,3,Nico Rosberg,6.0,German,18,3,3,88.09,Williams,6.0,3,0,2008,Australian Grand Prix,2008-03-16,7,86.059
4,6,Kazuki Nakajima,,Japanese,18,3,6,89.639,Williams,3.0,6,0,2008,Australian Grand Prix,2008-03-16,14,86.413
5,4,Fernando Alonso,14.0,Spanish,18,4,4,88.603,Renault,5.0,4,0,2008,Australian Grand Prix,2008-03-16,12,86.188
6,8,Kimi Räikkönen,7.0,Finnish,18,6,8,87.903,Ferrari,1.0,8,0,2008,Australian Grand Prix,2008-03-16,16,86.14
7,7,Sébastien Bourdais,,French,18,5,7,89.534,Toro Rosso,2.0,7,0,2008,Australian Grand Prix,2008-03-16,18,87.446
8,1,Lewis Hamilton,44.0,British,19,1,5,95.462,McLaren,14.0,1,1,2008,Malaysian Grand Prix,2008-03-23,4,94.627
9,5,Heikki Kovalainen,,Finnish,19,1,3,95.922,McLaren,10.0,4,0,2008,Malaysian Grand Prix,2008-03-23,3,94.759


In [23]:
df_minlap = pd.read_csv("./f1db_csv/min_laps.csv")
df_5 = df_5.merge(df_minlap,on='raceId')
df_5.head()

Unnamed: 0.1,driverId,driverName,number,nationality,raceId,constructorId,racePosition,fastestLapTime,constructorName,points,driverStanding,wins,year,name,date,position,minQualifyingTime,Unnamed: 0,minOverallRaceLap,minOverallQualiLap
0,1,Lewis Hamilton,44.0,British,18,1,1,87.452,McLaren,10.0,1,1,2008,Australian Grand Prix,2008-03-16,1,85.187,17,87.418,85.187
1,5,Heikki Kovalainen,,Finnish,18,1,5,87.418,McLaren,4.0,5,0,2008,Australian Grand Prix,2008-03-16,3,85.452,17,87.418,85.187
2,2,Nick Heidfeld,,German,18,2,2,87.739,BMW Sauber,8.0,2,0,2008,Australian Grand Prix,2008-03-16,5,85.518,17,87.418,85.187
3,3,Nico Rosberg,6.0,German,18,3,3,88.09,Williams,6.0,3,0,2008,Australian Grand Prix,2008-03-16,7,86.059,17,87.418,85.187
4,6,Kazuki Nakajima,,Japanese,18,3,6,89.639,Williams,3.0,6,0,2008,Australian Grand Prix,2008-03-16,14,86.413,17,87.418,85.187


In [26]:
df_5['race_lap_ratio'] = df_5['fastestLapTime']/df_5['minOverallRaceLap']
df_5['quali_lap_ratio'] = df_5['minQualifyingTime']/df_5['minOverallQualiLap']
df_5.head()

Unnamed: 0.1,driverId,driverName,number,nationality,raceId,constructorId,racePosition,fastestLapTime,constructorName,points,driverStanding,wins,year,name,date,position,minQualifyingTime,Unnamed: 0,minOverallRaceLap,minOverallQualiLap,race_lap_ratio,quali_lap_ratio
0,1,Lewis Hamilton,44.0,British,18,1,1,87.452,McLaren,10.0,1,1,2008,Australian Grand Prix,2008-03-16,1,85.187,17,87.418,85.187,1.00039,1.0
1,5,Heikki Kovalainen,,Finnish,18,1,5,87.418,McLaren,4.0,5,0,2008,Australian Grand Prix,2008-03-16,3,85.452,17,87.418,85.187,1.0,1.003111
2,2,Nick Heidfeld,,German,18,2,2,87.739,BMW Sauber,8.0,2,0,2008,Australian Grand Prix,2008-03-16,5,85.518,17,87.418,85.187,1.00367,1.003886
3,3,Nico Rosberg,6.0,German,18,3,3,88.09,Williams,6.0,3,0,2008,Australian Grand Prix,2008-03-16,7,86.059,17,87.418,85.187,1.00769,1.010236
4,6,Kazuki Nakajima,,Japanese,18,3,6,89.639,Williams,3.0,6,0,2008,Australian Grand Prix,2008-03-16,14,86.413,17,87.418,85.187,1.02541,1.014392


In [13]:
# Clean this dataset: drop variables and rearrange
df = copy.deepcopy(df_5)
df = df.drop(columns = ["driverId", "constructorId"])
df = df[["driverName", "number", "nationality", "year", "name", "date", "constructorName", "position", "minQualifyingTime", "racePosition", "fastestLapTime", "wins", "points", "driverStanding"]]

# Turn date into datetime
df["date"] = pd.to_datetime(df["date"])

# Save it into a csv
df.to_csv("./f1db_csv/driver_history.csv")

PermissionError: [Errno 13] Permission denied: './f1db_csv/driver_history.csv'

In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8181 entries, 0 to 8180
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   driverName         8181 non-null   object        
 1   number             4295 non-null   object        
 2   nationality        8181 non-null   object        
 3   year               8181 non-null   int64         
 4   name               8181 non-null   object        
 5   date               8181 non-null   datetime64[ns]
 6   constructorName    8181 non-null   object        
 7   position           8181 non-null   int64         
 8   minQualifyingTime  8052 non-null   float64       
 9   racePosition       6249 non-null   object        
 10  fastestLapTime     6220 non-null   object        
 11  wins               8181 non-null   int64         
 12  points             8181 non-null   float64       
 13  driverStanding     8181 non-null   int64         
dtypes: datet

In [28]:
# Function that chooses race history for a particular driver
def driver_history_selection(driver_name):
    driver_table = df[df.driverName == driver_name]
    driver_table = driver_table.sort_values("date", ascending = False)
    return driver_table

driver_history_selection("Alexander Albon")

Unnamed: 0,driverName,number,nationality,year,name,date,constructorName,position,minQualifyingTime,racePosition,fastestLapTime,wins,points,driverStanding,minOverallRaceLap,minOverallQualiLap,race_lap_ratio,quali_lap_ratio
7383,Alexander Albon,23,Thai,2020,British Grand Prix,2020-08-02,Red Bull,12,86.545,8.0,88.689,0,26.0,6,87.097,84.303,1.01828,1.026595
7363,Alexander Albon,23,Thai,2020,Hungarian Grand Prix,2020-07-19,Red Bull,13,75.715,5.0,79.44,0,22.0,5,76.627,73.447,1.03671,1.030879
7343,Alexander Albon,23,Thai,2020,Styrian Grand Prix,2020-07-12,Red Bull,7,79.014,4.0,67.299,0,12.0,8,65.619,77.825,1.0256,1.015278
7323,Alexander Albon,23,Thai,2020,Austrian Grand Prix,2020-07-05,Red Bull,5,63.746,13.0,68.432,0,0.0,13,67.475,62.939,1.01418,1.012822
7305,Alexander Albon,23,Thai,2019,Abu Dhabi Grand Prix,2019-12-01,Red Bull,6,95.682,6.0,102.219,0,92.0,8,99.283,94.779,1.02957,1.009527
7285,Alexander Albon,23,Thai,2019,Brazilian Grand Prix,2019-11-17,Red Bull,6,67.935,14.0,71.087,0,84.0,8,70.698,67.503,1.0055,1.0064
7265,Alexander Albon,23,Thai,2019,United States Grand Prix,2019-11-03,Red Bull,6,92.548,5.0,98.029,0,84.0,6,96.169,92.029,1.01934,1.00564
7245,Alexander Albon,23,Thai,2019,Mexican Grand Prix,2019-10-27,Red Bull,5,75.336,5.0,79.325,0,74.0,8,79.232,74.758,1.00117,1.007732
7225,Alexander Albon,23,Thai,2019,Japanese Grand Prix,2019-10-13,Red Bull,6,87.851,4.0,92.775,0,64.0,8,90.983,87.064,1.0197,1.009039
7205,Alexander Albon,23,Thai,2019,Russian Grand Prix,2019-09-29,Red Bull,19,99.197,5.0,96.762,0,52.0,8,95.761,91.628,1.01045,1.082606


In [54]:
import plotly.express as px
df_6 = df_5[df_5.year==2020]
df_grouped = [y for x,y in df_6.groupby('driverName',as_index=False)]
for i in range(len(df_grouped)):
    df_grouped[i]['rolling_quali_lap_ratio']=df_grouped[i]['quali_lap_ratio'].rolling(5,min_periods=1,win_type='triang').mean()
df_6 = pd.concat(df_grouped)
df_6.head(10)

Unnamed: 0.1,driverId,driverName,number,nationality,raceId,constructorId,racePosition,fastestLapTime,constructorName,points,driverStanding,wins,year,name,date,position,minQualifyingTime,Unnamed: 0,minOverallRaceLap,minOverallQualiLap,race_lap_ratio,quali_lap_ratio,rolling_quali_lap_ratio
7323,848,Alexander Albon,23,Thai,1031,9,13,68.432,Red Bull,0.0,13,0,2020,Austrian Grand Prix,2020-07-05,5,63.746,355,67.475,62.939,1.01418,1.012822,1.012822
7343,848,Alexander Albon,23,Thai,1032,9,4,67.299,Red Bull,12.0,8,0,2020,Styrian Grand Prix,2020-07-12,7,79.014,356,65.619,77.825,1.0256,1.015278,1.013641
7363,848,Alexander Albon,23,Thai,1033,9,5,79.44,Red Bull,22.0,5,0,2020,Hungarian Grand Prix,2020-07-19,13,75.715,357,76.627,73.447,1.03671,1.030879,1.01665
7383,848,Alexander Albon,23,Thai,1034,9,8,88.689,Red Bull,26.0,6,0,2020,British Grand Prix,2020-08-02,12,86.545,358,87.097,84.303,1.01828,1.026595,1.019979
7321,841,Antonio Giovinazzi,99,Italian,1031,51,9,68.796,Alfa Romeo,2.0,9,0,2020,Austrian Grand Prix,2020-07-05,18,65.175,355,67.475,62.939,1.01958,1.035526,1.035526
7341,841,Antonio Giovinazzi,99,Italian,1032,51,14,68.512,Alfa Romeo,2.0,13,0,2020,Styrian Grand Prix,2020-07-12,19,81.831,356,65.619,77.825,1.04409,1.051474,1.040842
7361,841,Antonio Giovinazzi,99,Italian,1033,51,17,80.096,Alfa Romeo,2.0,14,0,2020,Hungarian Grand Prix,2020-07-19,19,76.506,357,76.627,73.447,1.04527,1.041649,1.041863
7381,841,Antonio Giovinazzi,99,Italian,1034,51,14,90.977,Alfa Romeo,2.0,14,0,2020,British Grand Prix,2020-08-02,17,87.164,358,87.097,84.303,1.04455,1.033937,1.042839
7310,832,Carlos Sainz,55,Spanish,1031,1,5,67.974,McLaren,10.0,5,0,2020,Austrian Grand Prix,2020-07-05,8,63.971,355,67.475,62.939,1.0074,1.016397,1.016397
7330,832,Carlos Sainz,55,Spanish,1032,1,9,65.619,McLaren,13.0,7,0,2020,Styrian Grand Prix,2020-07-12,3,78.59,356,65.619,77.825,1.0,1.00983,1.014208


In [55]:
px.line(df_6,x='raceId',y='rolling_quali_lap_ratio',color='driverName')