In [2]:
%config IPCompleter.greedy=True
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [3]:
import os, sys, re, math, datetime as dt, pandas as pd, numpy as np, time
import logging
import matplotlib.pyplot as plt
from string import Template
from IPython.display import display, HTML

logging.basicConfig(format='%(asctime)s [%(name)s:%(lineno)d:%(funcName)s] [%(levelname)s] %(message)s', level=logging.INFO)

pd.set_option('display.max_rows', 5000)
pd.set_option('display.max_columns', 5000)
pd.set_option('display.max_colwidth', 5000)
pd.set_option('display.width', 5000)

def display_df(df):
    display(df.head(4))
    print(df.shape)

In [5]:
import fastf1 as f1


# schedule = f1.get_event_schedule(2023)
# # print(session.event)
# display_df(schedule)
# schedule.dtypes


In [6]:

def get_rounds(year):
    schedule = f1.get_event_schedule(year)
    rounds = []
    round_number = 1
    event_name = 6
    session_5 = 20
    
    # display(schedule.head(10))
    # display(schedule.tail())
    
    for row in schedule.itertuples():
        if row[session_5] == 'Race':
            rounds.append(row[event_name])
            
    # print(rounds)
    return rounds


In [None]:
RACES = []

for year in range(2018, 2024):
    rounds = get_rounds(year)
    # print(year)
    # print(rounds)
    for round in rounds:
        session = f1.get_session(year, round, 'R')
        print(session)
        RACES.append(session)

# for race in RACES:
#     print(race.session_info)
    

In [None]:
cache_dir = f"/Users/hannahwang/Projects/fastf1_cache"
f1.Cache.enable_cache(cache_dir=cache_dir)

race_0 = RACES[0]
race_0.load()

drivers = race_0.results.Abbreviation.tolist()

print("start:", drivers)

for race in RACES[1:]:
    race.load()
    race_drivers = race.results.Abbreviation.tolist()
    
    valid_drivers = []
    for driver in drivers:
        if driver in race_drivers:
            valid_drivers.append(driver)

    drivers = valid_drivers
    print(race, "remaining:", drivers)



In [11]:
drivers


['VER', 'BOT', 'SAI', 'LEC', 'GAS']

In [28]:
from pathlib import Path
from datetime import datetime
import os
import json

my_cache = Path('/Users/hannahwang/Projects/formula_1_stats/f1_cache')


def add_file(session, filename, data, is_df=True):
    
    year = str(session.event.EventDate.year)
    date = dt.datetime.strptime(str(session.event.EventDate), "%Y-%m-%d %H:%M:%S").strftime("%Y_%m_%d")
    eventname = str(session.event.EventName).replace(" ", "_")
    session_name = f"{date}_{eventname}_Race"

    print("year:", year)
    print("date:", date)
    print("eventname:", eventname)
    print("session_name:", session_name)
    
    parent = Path(my_cache, year, session_name)
    parent.mkdir(parents=True, exist_ok=True)
    
    filepath = Path(parent, filename)
    
    print("parent:", parent)
    print("filepath:", filepath)

    if is_df:
        data.to_csv(filepath, index=False)
    else:
        with open(filepath, 'w') as convert_file: 
            convert_file.write(data)

In [None]:
file_name_types = ['lap_count', 'event_data', 'session_info', 'session_status_data', 'timing_app_data',
                   'position_data', 'car_data', 'driver_info', '_extended_timing_data']

# race.event -> event_data.csv
# for race in RACES:
#     print(race)
#     # print(race_50.event)
#     df = pd.DataFrame.from_dict([race.event], orient='columns')
#     # display(df)
    
#     add_file(race, 'event_data.csv', df)

In [None]:
# race.session_info -> session_info.txt
# for race in RACES:
#     print(race)
#     add_file(race, 'session_info.txt', str(race.session_info), is_df=False)

In [None]:
# race.results -> result_data.txt
# for race in RACES:
#     print(race)
#     add_file(race, 'result_data.csv', race.results)

In [None]:
# race.laps -> lap_data_general.csv
# for race in RACES:
#     print(race)
#     add_file(race, 'lap_data_general.csv', race.laps)

In [None]:
# race.results -> result_data.txt
for race in RACES:
    print(race)
    add_file(race, 'lap_data_general.csv', race.laps)

In [246]:
laps = RACES[50].laps
laps.head()
# laps.Driver.unique()
driver = 'GAS'
driver_laps = laps[laps.Driver==driver]
    
lap_list = list(driver_laps.LapNumber)
min_lap = int(min(lap_list))
max_lap = int(max(lap_list))
# print(min_lap, max_lap)

all_lap_tel = []
for lap in range(min_lap, max_lap+1):
    tel = driver_laps.pick_laps([lap]).get_telemetry()
    tel['Lap'] = lap
    
    # reorder cols
    cols = list(tel.columns)
    cols.insert(3, cols.pop())
    tel = tel[cols]

    print(lap, len(tel.columns))
    
    all_lap_tel.append(tel)
    
all_lap_tel = pd.concat(all_lap_tel)
all_lap_tel['Driver'] = driver

display(all_lap_tel.head())

cols = list(all_lap_tel.columns)
print(cols)
cols.insert(0, cols.pop())

all_lap_tel = all_lap_tel[cols]
display(all_lap_tel.head())


1 19


Unnamed: 0,Date,SessionTime,DriverAhead,Lap,DistanceToDriverAhead,Time,RPM,Speed,nGear,Throttle,Brake,DRS,Source,Distance,RelativeDistance,Status,X,Y,Z,Driver
2,2020-09-13 13:13:25.607,0 days 00:33:25.335000,,1,0.218889,0 days 00:00:00,10917,1,1,25,False,1,interpolation,0.001304,1e-06,OnTrack,-1626,-1969,3009,GAS
3,2020-09-13 13:13:25.800,0 days 00:33:25.528000,,1,0.218889,0 days 00:00:00.193000,10917,2,1,25,False,1,pos,0.204105,0.000177,OnTrack,-1626,-1972,3009,GAS
4,2020-09-13 13:13:25.830,0 days 00:33:25.558000,,1,0.218889,0 days 00:00:00.223000,10917,4,1,25,False,1,car,0.266667,0.000231,OnTrack,-1625,-1972,3008,GAS
5,2020-09-13 13:13:26.019,0 days 00:33:25.747000,,1,0.218889,0 days 00:00:00.412000,10147,7,1,26,False,1,pos,0.830416,0.000719,OnTrack,-1624,-1971,3009,GAS
6,2020-09-13 13:13:26.070,0 days 00:33:25.798000,,1,0.218889,0 days 00:00:00.463000,9377,11,1,28,False,1,car,1.0,0.000866,OnTrack,-1623,-1969,3009,GAS


['Date', 'SessionTime', 'DriverAhead', 'Lap', 'DistanceToDriverAhead', 'Time', 'RPM', 'Speed', 'nGear', 'Throttle', 'Brake', 'DRS', 'Source', 'Distance', 'RelativeDistance', 'Status', 'X', 'Y', 'Z', 'Driver']


Unnamed: 0,Driver,Date,SessionTime,DriverAhead,Lap,DistanceToDriverAhead,Time,RPM,Speed,nGear,Throttle,Brake,DRS,Source,Distance,RelativeDistance,Status,X,Y,Z
2,GAS,2020-09-13 13:13:25.607,0 days 00:33:25.335000,,1,0.218889,0 days 00:00:00,10917,1,1,25,False,1,interpolation,0.001304,1e-06,OnTrack,-1626,-1969,3009
3,GAS,2020-09-13 13:13:25.800,0 days 00:33:25.528000,,1,0.218889,0 days 00:00:00.193000,10917,2,1,25,False,1,pos,0.204105,0.000177,OnTrack,-1626,-1972,3009
4,GAS,2020-09-13 13:13:25.830,0 days 00:33:25.558000,,1,0.218889,0 days 00:00:00.223000,10917,4,1,25,False,1,car,0.266667,0.000231,OnTrack,-1625,-1972,3008
5,GAS,2020-09-13 13:13:26.019,0 days 00:33:25.747000,,1,0.218889,0 days 00:00:00.412000,10147,7,1,26,False,1,pos,0.830416,0.000719,OnTrack,-1624,-1971,3009
6,GAS,2020-09-13 13:13:26.070,0 days 00:33:25.798000,,1,0.218889,0 days 00:00:00.463000,9377,11,1,28,False,1,car,1.0,0.000866,OnTrack,-1623,-1969,3009


In [298]:
# race = RACES[1]
# print(race)
# laps = race.laps
# # display(laps.head())
# driver = 'VER'
# driver_laps = laps[laps.Driver==driver]
# display(driver_laps.head())
# lap = 3
# tel = laps.pick_laps([5]).pick_driver('HAM')
# display(tel.head())
# tel.get_pos_data()

2018 Season Round 2: Bahrain Grand Prix - Race


Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,Sector1SessionTime,Sector2SessionTime,Sector3SessionTime,SpeedI1,SpeedI2,SpeedFL,SpeedST,IsPersonalBest,Compound,TyreLife,FreshTyre,Team,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate
564,0 days 00:09:20.801000,VER,33,0 days 00:01:43.654000,1.0,,NaT,NaT,NaT,0 days 00:00:43.420000,0 days 00:00:23.877000,NaT,0 days 00:08:56.924000,0 days 00:09:21.236000,225.0,248.0,291.0,248.0,False,,,True,Red Bull Racing,0 days 00:07:36.924000,NaT,2.0,11.0,False,,False,False
565,0 days 00:12:17.903000,VER,33,NaT,2.0,1.0,NaT,0 days 00:12:15.165000,0 days 00:00:36.350000,0 days 00:01:19.426000,0 days 00:01:01.330000,0 days 00:09:57.147000,0 days 00:11:16.632000,0 days 00:12:17.999000,151.0,104.0,,311.0,False,SOFT,1.0,True,Red Bull Racing,0 days 00:09:20.801000,NaT,26.0,19.0,False,,False,False
566,0 days 00:14:26.015000,VER,33,0 days 00:02:08.112000,3.0,2.0,0 days 00:12:46.029000,NaT,0 days 00:01:00.233000,0 days 00:00:43.101000,0 days 00:00:24.778000,0 days 00:13:18.136000,0 days 00:14:01.237000,0 days 00:14:26.015000,220.0,240.0,273.0,190.0,False,SUPERSOFT,1.0,True,Red Bull Racing,0 days 00:12:17.903000,NaT,67.0,19.0,False,,False,False
567,0 days 00:16:56.015000,VER,33,NaT,4.0,2.0,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,,,,,SUPERSOFT,2.0,True,Red Bull Racing,0 days 00:14:26.015000,NaT,,,False,,True,False


Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,Sector1SessionTime,Sector2SessionTime,Sector3SessionTime,SpeedI1,SpeedI2,SpeedFL,SpeedST,IsPersonalBest,Compound,TyreLife,FreshTyre,Team,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate
628,0 days 00:16:48.880000,HAM,44,0 days 00:01:36.932000,5.0,1.0,NaT,NaT,0 days 00:00:30.206000,0 days 00:00:42.346000,0 days 00:00:24.380000,0 days 00:15:42.154000,0 days 00:16:24.500000,0 days 00:16:48.880000,240.0,253.0,291.0,333.0,True,SOFT,7.0,False,Mercedes,0 days 00:15:11.948000,NaT,2,6.0,False,,False,True


KeyError: '44'

In [None]:
# for race in RACES[31:]:
#     laps = race.laps
    
#     all_driver_lap_tel = []
#     all_empty = True
    
#     drivers = ['VER', 'BOT', 'SAI', 'LEC', 'GAS', 'VET', 'HAM']
#     for driver in drivers:
#         if driver not in list(laps.Driver.unique()):
#             print(f"WARNING: no data for {driver}")
#             continue
            
#         driver_laps = laps.pick_driver(driver)
        
#         lap_list = list(driver_laps.LapNumber)
#         min_lap = int(min(lap_list))
#         max_lap = int(max(lap_list))
#         print(min_lap, max_lap)
    
#         all_lap_tel = []
#         empty = True
#         for lap in range(min_lap, max_lap+1):
#             # print(driver, lap)
#             try:
#                 tel = laps.pick_driver(driver).pick_laps(lap).get_telemetry()
#                 empty = False
#             except KeyError:
#                 print(f"{race}: no car_data for {driver} for {lap}")
#                 continue
#             except ValueError:
#                 print(f"{race}: no car_data for {driver} for {lap}")
#                 print("attempt to get argmin of an empty sequence")
#                 continue

#             tel['Lap'] = lap
            
#             # reorder cols
#             cols = list(tel.columns)
#             cols.insert(0, cols.pop())
#             tel = tel[cols]
        
#             # print(lap, len(tel.columns))
            
#             all_lap_tel.append(tel)
            
#         if not empty:
#             all_lap_tel = pd.concat(all_lap_tel)
#             all_lap_tel['Driver'] = driver
    
#             # reorder cols
#             cols = list(all_lap_tel.columns)
#             cols.insert(0, cols.pop())
#             all_lap_tel = all_lap_tel[cols]
            
#             # display(all_lap_tel.head())
#             all_driver_lap_tel.append(all_lap_tel)
#             all_empty = False

#     if not all_empty:
#         all_driver_lap_tel = pd.concat(all_driver_lap_tel)
#         display(all_driver_lap_tel.head(1))
#         # display(all_driver_lap_tel.tail(1))

#         filepath = 'tel_data.csv'
        add_file(race, filepath, all_driver_lap_tel)

In [64]:
# filepath = '/Users/hannahwang/Projects/formula_1_stats/f1_cache/2018/2018_05_13_Spanish_Grand_Prix_Race/track_status.csv'

# df = pd.read_csv(
#     filepath,
#     # parse_dates=[24],
#     # date_format='%Y-%m-%d %H:%M:%S.%f'
    
# )

# # for i in range(1,6):
# #     df[f'Session{i}DateUtc'] = pd.to_datetime(df[f'Session{i}DateUtc'])

# # to_timedelta = ['Time', 'LapTime', 'PitOutTime', 'PitInTime', 'Sector1Time', 'Sector2Time', 'Sector3Time',
# #  'Sector1SessionTime', 'Sector2SessionTime', 'Sector3SessionTime', 'LapStartTime']

# # for col in to_timedelta:
# #     df[col] = pd.to_timedelta(df[col])    

# # df['IsPersonalBest'] = df['IsPersonalBest'].astype('bool')
# # df.Time = pd.to_timedelta(df.Time)
# # df.SessionTime = pd.to_timedelta(df.SessionTime)

# # df['Time'] = pd.to_timedelta(df['Time'])
# # print(df.dtypes)
# # display(df)

Time       timedelta64[ns]
Status               int64
Message             object
dtype: object


Unnamed: 0,Time,Status,Message
0,0 days 00:07:38.778000,2,Yellow
1,0 days 00:07:45.735000,4,SCDeployed
2,0 days 00:20:01.446000,1,AllClear
3,0 days 01:06:31.369000,2,Yellow
4,0 days 01:07:06.365000,6,VSCDeployed
5,0 days 01:10:26.389000,7,VSCEnding
6,0 days 01:10:40.273000,1,AllClear
7,0 days 01:10:45.365000,2,Yellow
8,0 days 01:10:53.057000,1,AllClear


In [None]:
# track_status

In [42]:
# RACES[31]

2019 Season Round 11: German Grand Prix - Race

In [None]:
# race = RACES[0]
# for race in RACES:
#     add_file(race, 'session_status.csv', race.session_status)
#     add_file(race, 'track_status.csv', race.track_status)

In [310]:
filepath = '/Users/hannahwang/Projects/formula_1/f1_cache/2023/2023_11_26_Abu_Dhabi_Grand_Prix_Race/track_status.csv'
df = pd.read_csv(filepath)
df

Unnamed: 0,Time,Status,Message
0,0 days 00:13:17.860000,1,AllClear
1,0 days 00:22:34.031000,2,Yellow
2,0 days 00:22:36.594000,1,AllClear


In [311]:
RACES[0].session_start_time

datetime.timedelta(seconds=427, microseconds=988000)

In [None]:
# race = RACES[0]

# for race in RACES:
#     driver_data = []
#     drivers = ['VER', 'BOT', 'SAI', 'LEC', 'GAS', 'VET', 'HAM']
#     for driver in drivers:
#         try:
#             df = pd.DataFrame.from_dict(race.get_driver(driver)).T
#         except ValueError:
#             print(f"no driver data for {driver}")
#             continue
#         # display(df)
#         driver_data.append(df)
        
#     df = pd.concat(driver_data)
#     df.Time = pd.to_timedelta(df.Time)
#     # display(df)
#     add_file(race, "driver_data.csv", df)

In [None]:
for race in RACES[50:]:
    try:
        corners = race.get_circuit_info().corners
        corners['Type'] = 'corners'
        lights = race.get_circuit_info().marshal_lights
        lights['Type'] = 'lights'
        sectors = race.get_circuit_info().marshal_sectors
        sectors['Type'] = 'sectors'
    except KeyError:
        print(f"keyerror, no circuit info for {race}")
        continue
    except :
        print(f"TypeError: 'NoneType' object is not iterable: {race}")
        continue
        
    df = pd.concat([corners, lights, sectors])
    df['Rotation'] = race.get_circuit_info().rotation

    add_file(race, 'circuit_info.csv', df)

In [None]:
filepath = '/Users/hannahwang/Projects/formula_1_stats/f1_cache/2023/2023_11_26_Abu_Dhabi_Grand_Prix_Race/'
file = 'lap_data_general.csv'

laps = pd.read_csv(f"{filepath}{file}")

laps.head()
driver_id = "VER"

driver_laps = laps[laps.Driver==driver_id]

laps = list(driver_laps.LapNumber.astype('int64'))
laps

In [89]:
pos = pd.read_csv(f"{filepath}pos_data.csv")
pos.head()

Unnamed: 0,Driver,Lap,Date,Status,X,Y,Z,Source,Time,SessionTime
0,VER,1,2023-11-26 13:03:25.868,OnTrack,1511,2188,-240,pos,0 days 00:00:00.224000,0 days 01:02:24.743000
1,VER,1,2023-11-26 13:03:26.289,OnTrack,1512,2188,-240,pos,0 days 00:00:00.645000,0 days 01:02:25.164000
2,VER,1,2023-11-26 13:03:26.529,OnTrack,1527,2190,-240,pos,0 days 00:00:00.885000,0 days 01:02:25.404000
3,VER,1,2023-11-26 13:03:26.949,OnTrack,1545,2192,-240,pos,0 days 00:00:01.305000,0 days 01:02:25.824000
4,VER,1,2023-11-26 13:03:27.109,OnTrack,1559,2194,-240,pos,0 days 00:00:01.465000,0 days 01:02:25.984000


In [108]:
pos = pd.read_csv(f"{filepath}circuit_info.csv")
pos.head()

Unnamed: 0,X,Y,Number,Letter,Angle,Distance,Type,Rotation
0,4294.859375,2690.717773,1,,-35.3406,369.922891,corners,280.0
1,3848.351318,4987.570801,2,,42.198164,622.414126,corners,280.0
2,2206.551025,6155.120117,3,,-163.169332,829.423488,corners,280.0
3,2378.65625,8052.805176,4,,-8.298533,1033.333773,corners,280.0
4,1839.358276,11771.707031,5,,84.326847,1409.545099,corners,280.0


In [109]:
pos = pd.read_csv(f"{filepath}lap_data_general.csv")
pos.head()

Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,Sector1SessionTime,Sector2SessionTime,Sector3SessionTime,SpeedI1,SpeedI2,SpeedFL,SpeedST,IsPersonalBest,Compound,TyreLife,FreshTyre,Team,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate
0,0 days 01:03:57.047000,VER,1,0 days 00:01:32.190000,1.0,1.0,,,,0 days 00:00:38.769000,0 days 00:00:33.004000,,0 days 01:03:24.128000,0 days 01:03:57.112000,284.0,293.0,213.0,304.0,False,MEDIUM,1.0,True,Red Bull Racing,0 days 01:02:24.519000,2023-11-26 13:03:25.644,1,1.0,False,,False,False
1,0 days 01:05:27.757000,VER,1,0 days 00:01:30.710000,2.0,1.0,,,0 days 00:00:18.377000,0 days 00:00:38.691000,0 days 00:00:33.642000,0 days 01:04:15.424000,0 days 01:04:54.115000,0 days 01:05:27.757000,281.0,285.0,212.0,294.0,True,MEDIUM,2.0,True,Red Bull Racing,0 days 01:03:57.047000,2023-11-26 13:04:58.172,1,1.0,False,,False,True
2,0 days 01:06:58.165000,VER,1,0 days 00:01:30.408000,3.0,1.0,,,0 days 00:00:18.549000,0 days 00:00:38.725000,0 days 00:00:33.134000,0 days 01:05:46.306000,0 days 01:06:25.031000,0 days 01:06:58.165000,280.0,293.0,211.0,299.0,True,MEDIUM,3.0,True,Red Bull Racing,0 days 01:05:27.757000,2023-11-26 13:06:28.882,1,1.0,False,,False,True
3,0 days 01:08:28.881000,VER,1,0 days 00:01:30.716000,4.0,1.0,,,0 days 00:00:18.562000,0 days 00:00:38.810000,0 days 00:00:33.344000,0 days 01:07:16.727000,0 days 01:07:55.537000,0 days 01:08:28.881000,,291.0,212.0,293.0,False,MEDIUM,4.0,True,Red Bull Racing,0 days 01:06:58.165000,2023-11-26 13:07:59.290,1,1.0,False,,False,True
4,0 days 01:09:59.433000,VER,1,0 days 00:01:30.552000,5.0,1.0,,,0 days 00:00:18.599000,0 days 00:00:38.714000,0 days 00:00:33.239000,0 days 01:08:47.480000,0 days 01:09:26.194000,0 days 01:09:59.433000,,293.0,209.0,291.0,False,MEDIUM,5.0,True,Red Bull Racing,0 days 01:08:28.881000,2023-11-26 13:09:30.006,1,1.0,False,,False,True


In [110]:
pos = pd.read_csv(f"{filepath}tel_data.csv")
pos.head()

Unnamed: 0,Driver,Lap,Date,SessionTime,DriverAhead,DistanceToDriverAhead,Time,RPM,Speed,nGear,Throttle,Brake,DRS,Source,Distance,RelativeDistance,Status,X,Y,Z
0,VER,1,2023-11-26 13:03:25.644,0 days 01:02:24.519000,,0.0,0 days 00:00:00,10082,0,1,15,True,1,interpolation,-0.033613,-7e-06,OnTrack,1510,2188,-240
1,VER,1,2023-11-26 13:03:25.847,0 days 01:02:24.722000,,0.0,0 days 00:00:00.203000,10048,0,1,15,True,1,car,0.0,0.0,OnTrack,1511,2188,-240
2,VER,1,2023-11-26 13:03:25.868,0 days 01:02:24.743000,,0.0,0 days 00:00:00.224000,9301,3,1,15,True,1,pos,0.024529,5e-06,OnTrack,1511,2188,-240
3,VER,1,2023-11-26 13:03:26.289,0 days 01:02:25.164000,,0.0,0 days 00:00:00.645000,8554,7,1,15,True,1,pos,1.292379,0.00025,OnTrack,1512,2188,-240
4,VER,1,2023-11-26 13:03:26.327,0 days 01:02:25.202000,16.0,0.0,0 days 00:00:00.683000,7808,11,1,15,False,1,car,1.466667,0.000284,OnTrack,1513,2188,-240


In [111]:
pos = pd.read_csv(f"{filepath}event_data.csv")
pos.head()

Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,Session2,Session2Date,Session2DateUtc,Session3,Session3Date,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport
0,22,Abu Dhabi,Yas Island,FORMULA 1 ETIHAD AIRWAYS ABU DHABI GRAND PRIX 2023,2023-11-26,Abu Dhabi Grand Prix,conventional,Practice 1,2023-11-24 13:30:00+04:00,2023-11-24 09:30:00,Practice 2,2023-11-24 17:00:00+04:00,2023-11-24 13:00:00,Practice 3,2023-11-25 14:30:00+04:00,2023-11-25 10:30:00,Qualifying,2023-11-25 18:00:00+04:00,2023-11-25 14:00:00,Race,2023-11-26 17:00:00+04:00,2023-11-26 13:00:00,True


In [112]:
pos = pd.read_csv(f"{filepath}driver_data.csv")
pos.head()

Unnamed: 0,DriverNumber,BroadcastName,Abbreviation,DriverId,TeamName,TeamColor,TeamId,FirstName,LastName,FullName,HeadshotUrl,CountryCode,Position,ClassifiedPosition,GridPosition,Q1,Q2,Q3,Time,Status,Points
0,1,M VERSTAPPEN,VER,max_verstappen,Red Bull Racing,3671C6,red_bull,Max,Verstappen,Max Verstappen,https://www.formula1.com/content/dam/fom-website/drivers/M/MAXVER01_Max_Verstappen/maxver01.png.transform/1col/image.png,,1.0,1,1.0,,,,0 days 01:27:02.624000,Finished,26.0
1,77,V BOTTAS,BOT,bottas,Alfa Romeo,C92D4B,alfa,Valtteri,Bottas,Valtteri Bottas,https://www.formula1.com/content/dam/fom-website/drivers/V/VALBOT01_Valtteri_Bottas/valbot01.png.transform/1col/image.png,,19.0,19,18.0,,,,,+1 Lap,0.0
2,55,C SAINZ,SAI,sainz,Ferrari,F91536,ferrari,Carlos,Sainz,Carlos Sainz,https://www.formula1.com/content/dam/fom-website/drivers/C/CARSAI01_Carlos_Sainz/carsai01.png.transform/1col/image.png,,18.0,18,16.0,,,,,Retired,0.0
3,16,C LECLERC,LEC,leclerc,Ferrari,F91536,ferrari,Charles,Leclerc,Charles Leclerc,https://www.formula1.com/content/dam/fom-website/drivers/C/CHALEC01_Charles_Leclerc/chalec01.png.transform/1col/image.png,,2.0,2,2.0,,,,0 days 00:00:17.993000,Finished,18.0
4,10,P GASLY,GAS,gasly,Alpine,2293D1,alpine,Pierre,Gasly,Pierre Gasly,https://www.formula1.com/content/dam/fom-website/drivers/P/PIEGAS01_Pierre_Gasly/piegas01.png.transform/1col/image.png,,13.0,13,10.0,,,,0 days 00:01:10.360000,Finished,0.0


In [118]:
event_data = pd.read_csv(f"{filepath}event_data.csv",
                         parse_dates=[8, 11, 14, 17, 20],
                         date_format='%Y-%m-%d %H:%M:%S%z'
                        )

for i in range(1, 6):
    event_data[f'Session{i}DateUtc'] = pd.to_datetime(event_data[f'Session{i}DateUtc'])

event_data['EventDate'] = pd.to_datetime(event_data['EventDate'])

display(event_data.head())
event_data.dtypes

Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,Session2,Session2Date,Session2DateUtc,Session3,Session3Date,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport
0,22,Abu Dhabi,Yas Island,FORMULA 1 ETIHAD AIRWAYS ABU DHABI GRAND PRIX 2023,2023-11-26,Abu Dhabi Grand Prix,conventional,Practice 1,2023-11-24 13:30:00+04:00,2023-11-24 09:30:00,Practice 2,2023-11-24 17:00:00+04:00,2023-11-24 13:00:00,Practice 3,2023-11-25 14:30:00+04:00,2023-11-25 10:30:00,Qualifying,2023-11-25 18:00:00+04:00,2023-11-25 14:00:00,Race,2023-11-26 17:00:00+04:00,2023-11-26 13:00:00,True


RoundNumber                              int64
Country                                 object
Location                                object
OfficialEventName                       object
EventDate                       datetime64[ns]
EventName                               object
EventFormat                             object
Session1                                object
Session1Date         datetime64[ns, UTC+04:00]
Session1DateUtc                 datetime64[ns]
Session2                                object
Session2Date         datetime64[ns, UTC+04:00]
Session2DateUtc                 datetime64[ns]
Session3                                object
Session3Date         datetime64[ns, UTC+04:00]
Session3DateUtc                 datetime64[ns]
Session4                                object
Session4Date         datetime64[ns, UTC+04:00]
Session4DateUtc                 datetime64[ns]
Session5                                object
Session5Date         datetime64[ns, UTC+04:00]
Session5DateU

In [121]:
laps = pd.read_csv(f"{filepath}tel_data.csv")


laps = laps[(laps.Driver==driver_id) && (laps.Lap.isin(laps))]


SyntaxError: invalid syntax (1307583744.py, line 4)

In [122]:
laps.dtypes

Driver                    object
Lap                        int64
Date                      object
SessionTime               object
DriverAhead              float64
DistanceToDriverAhead    float64
Time                      object
RPM                        int64
Speed                      int64
nGear                      int64
Throttle                   int64
Brake                       bool
DRS                        int64
Source                    object
Distance                 float64
RelativeDistance         float64
Status                    object
X                          int64
Y                          int64
Z                          int64
dtype: object