In [1]:
%config IPCompleter.greedy=True
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import os, sys, re, math, datetime as dt, pandas as pd, numpy as np, time
import logging
import matplotlib.pyplot as plt
from string import Template
from IPython.display import display, HTML

logging.basicConfig(format='%(asctime)s [%(name)s:%(lineno)d:%(funcName)s] [%(levelname)s] %(message)s', level=logging.INFO)

pd.set_option('display.max_rows', 5000)
pd.set_option('display.max_columns', 5000)
pd.set_option('display.max_colwidth', 5000)
pd.set_option('display.width', 5000)

def display_df(df):
    display(df.head(4))
    print(df.shape)

In [4]:
df = pd.read_csv('../f1_data/driver_standings.csv')
df.head()


Unnamed: 0,driverStandingsId,raceId,driverId,points,position,positionText,wins
0,1,18,1,10.0,1,1,1
1,2,18,2,8.0,2,2,0
2,3,18,3,6.0,3,3,0
3,4,18,4,5.0,4,4,0
4,5,18,5,4.0,5,5,0


In [5]:
results = pd.read_csv('../f1_data/results.csv')
results.head()

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId
0,1,18,1,1,22,1,1,1,1,10.0,58,1:34:50.616,5690616,39,2,1:27.452,218.3,1
1,2,18,2,2,3,5,2,2,2,8.0,58,+5.478,5696094,41,3,1:27.739,217.586,1
2,3,18,3,3,7,7,3,3,3,6.0,58,+8.163,5698779,41,5,1:28.090,216.719,1
3,4,18,4,4,5,11,4,4,4,5.0,58,+17.181,5707797,58,7,1:28.603,215.464,1
4,5,18,5,1,23,3,5,5,5,4.0,58,+18.014,5708630,43,1,1:27.418,218.385,1


In [6]:
races = pd.read_csv('../f1_data/races.csv')
races.head()

Unnamed: 0,raceId,year,round,circuitId,name,date,time,url,fp1_date,fp1_time,fp2_date,fp2_time,fp3_date,fp3_time,quali_date,quali_time,sprint_date,sprint_time
0,1,2009,1,1,Australian Grand Prix,2009-03-29,06:00:00,http://en.wikipedia.org/wiki/2009_Australian_Grand_Prix,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
1,2,2009,2,2,Malaysian Grand Prix,2009-04-05,09:00:00,http://en.wikipedia.org/wiki/2009_Malaysian_Grand_Prix,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
2,3,2009,3,17,Chinese Grand Prix,2009-04-19,07:00:00,http://en.wikipedia.org/wiki/2009_Chinese_Grand_Prix,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
3,4,2009,4,3,Bahrain Grand Prix,2009-04-26,12:00:00,http://en.wikipedia.org/wiki/2009_Bahrain_Grand_Prix,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N
4,5,2009,5,4,Spanish Grand Prix,2009-05-10,12:00:00,http://en.wikipedia.org/wiki/2009_Spanish_Grand_Prix,\N,\N,\N,\N,\N,\N,\N,\N,\N,\N


In [8]:
races[races.year==2023].raceId.to_list()

[1098,
 1099,
 1100,
 1101,
 1102,
 1104,
 1105,
 1106,
 1107,
 1108,
 1109,
 1110,
 1111,
 1112,
 1113,
 1114,
 1115,
 1116,
 1117,
 1118,
 1119,
 1120]

In [9]:
results.dtypes

resultId             int64
raceId               int64
driverId             int64
constructorId        int64
number              object
grid                 int64
position            object
positionText        object
positionOrder        int64
points             float64
laps                 int64
time                object
milliseconds        object
fastestLap          object
rank                object
fastestLapTime      object
fastestLapSpeed     object
statusId             int64
dtype: object

In [16]:
results[results.raceId==1089].sort_values(by='milliseconds', ascending=True)

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId
25700,25706,1089,830,9,1,7,1,1,1,25.0,53,1:20:27.511,4827511,38,6,1:24.745,246.088,1
25701,25707,1089,844,6,16,1,2,2,2,18.0,53,+2.446,4829957,38,2,1:24.336,247.282,1
25702,25708,1089,847,131,63,2,3,3,3,15.0,53,+3.405,4830916,40,7,1:25.288,244.522,1
25703,25709,1089,832,6,55,18,4,4,4,12.0,53,+5.061,4832572,41,4,1:24.446,246.96,1
25704,25710,1089,1,131,44,19,5,5,5,10.0,53,+5.380,4832891,43,3,1:24.434,246.995,1
25705,25711,1089,815,9,11,13,6,6,6,9.0,53,+6.091,4833602,46,1,1:24.030,248.182,1
25706,25712,1089,846,1,4,3,7,7,7,6.0,53,+6.207,4833718,43,5,1:24.718,246.167,1
25707,25713,1089,842,213,10,5,8,8,8,4.0,53,+6.396,4833907,30,14,1:26.718,240.489,1
25708,25714,1089,856,3,45,8,9,9,9,2.0,53,+7.122,4834633,41,13,1:26.624,240.75,1
25709,25715,1089,855,51,24,9,10,10,10,1.0,53,+7.910,4835421,41,10,1:26.361,241.484,1


In [11]:
results.time.to_list()

['1:34:50.616',
 '+5.478',
 '+8.163',
 '+17.181',
 '+18.014',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '1:31:18.555',
 '+19.570',
 '+38.450',
 '+45.832',
 '+46.548',
 '+49.833',
 '+1:08.130',
 '+1:10.041',
 '+1:16.220',
 '+1:26.214',
 '+1:32.202',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '1:31:06.970',
 '+3.339',
 '+4.998',
 '+8.409',
 '+26.789',
 '+41.314',
 '+45.473',
 '+55.889',
 '+1:09.500',
 '+1:17.181',
 '+1:17.862',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '1:38:19.051',
 '+3.228',
 '+4.187',
 '+5.694',
 '+35.938',
 '+53.010',
 '+58.244',
 '+59.435',
 '+1:03.073',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '\\N',
 '1:26:49.451',
 '+3.779',
 '+4.271',
 '+21.945',
 '+38.741',
 '+53.724',
 '+1:04.229',
 '+1:11.406',
 '+1:15.270',
 '+1:16.34