In [6]:
import fastf1
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings

warnings.filterwarnings("ignore")

# Enable FastF1 API cache for faster data retrieval
fastf1.Cache.enable_cache("../data/raw/fastf1_cache")

#Setting plotting style
plt.style.use("seaborn-v0_8")

# FastF1 Data Exploration

In [7]:
# Get 2024 seasion schedule
schedule_2024 = fastf1.get_event_schedule(2024)

print(f"Number of races in 2024: {len(schedule_2024)}")
print(f"Columns available: {schedule_2024.columns.tolist()}")
print("\nFirst 5 races:")
print(schedule_2024[['RoundNumber', 'EventName', 'Location', 'EventDate']].head())

Number of races in 2024: 25
Columns available: ['RoundNumber', 'Country', 'Location', 'OfficialEventName', 'EventDate', 'EventName', 'EventFormat', 'Session1', 'Session1Date', 'Session1DateUtc', 'Session2', 'Session2Date', 'Session2DateUtc', 'Session3', 'Session3Date', 'Session3DateUtc', 'Session4', 'Session4Date', 'Session4DateUtc', 'Session5', 'Session5Date', 'Session5DateUtc', 'F1ApiSupport']

First 5 races:
   RoundNumber                 EventName   Location  EventDate
0            0        Pre-Season Testing     Sakhir 2024-02-23
1            1        Bahrain Grand Prix     Sakhir 2024-03-02
2            2  Saudi Arabian Grand Prix     Jeddah 2024-03-09
3            3     Australian Grand Prix  Melbourne 2024-03-24
4            4       Japanese Grand Prix     Suzuka 2024-04-07


In [8]:
schedule_2024.head()

Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,...,Session3,Session3Date,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport
0,0,Bahrain,Sakhir,FORMULA 1 ARAMCO PRE-SEASON TESTING 2024,2024-02-23,Pre-Season Testing,testing,Practice 1,2024-02-21 10:00:00+03:00,2024-02-21 07:00:00,...,Practice 3,2024-02-23 10:00:00+03:00,2024-02-23 07:00:00,,NaT,NaT,,NaT,NaT,True
1,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,...,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True
2,2,Saudi Arabia,Jeddah,FORMULA 1 STC SAUDI ARABIAN GRAND PRIX 2024,2024-03-09,Saudi Arabian Grand Prix,conventional,Practice 1,2024-03-07 16:30:00+03:00,2024-03-07 13:30:00,...,Practice 3,2024-03-08 16:30:00+03:00,2024-03-08 13:30:00,Qualifying,2024-03-08 20:00:00+03:00,2024-03-08 17:00:00,Race,2024-03-09 20:00:00+03:00,2024-03-09 17:00:00,True
3,3,Australia,Melbourne,FORMULA 1 ROLEX AUSTRALIAN GRAND PRIX 2024,2024-03-24,Australian Grand Prix,conventional,Practice 1,2024-03-22 12:30:00+11:00,2024-03-22 01:30:00,...,Practice 3,2024-03-23 12:30:00+11:00,2024-03-23 01:30:00,Qualifying,2024-03-23 16:00:00+11:00,2024-03-23 05:00:00,Race,2024-03-24 15:00:00+11:00,2024-03-24 04:00:00,True
4,4,Japan,Suzuka,FORMULA 1 MSC CRUISES JAPANESE GRAND PRIX 2024,2024-04-07,Japanese Grand Prix,conventional,Practice 1,2024-04-05 11:30:00+09:00,2024-04-05 02:30:00,...,Practice 3,2024-04-06 11:30:00+09:00,2024-04-06 02:30:00,Qualifying,2024-04-06 15:00:00+09:00,2024-04-06 06:00:00,Race,2024-04-07 14:00:00+09:00,2024-04-07 05:00:00,True


In [10]:
# Analyzing session data
# Load specific race to understand the data structure

bahrain_2024 = fastf1.get_session(2024, 'Bahrain', 'R')
bahrain_2024.load()

print("Session Information:")
print(f"Date: {bahrain_2024.date}")
print(f"Track: {bahrain_2024.event['EventName']}")
print(f"Weather: {bahrain_2024.weather_data}")

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.6.0]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '4', '44', '81', '14', '18', '24', '20', '3', '22', '23', '27', '31', '10', '77', '2']


Session Information:
Date: 2024-03-02 15:00:00
Track: Bahrain Grand Prix
Weather:                       Time  AirTemp  Humidity  Pressure  Rainfall  TrackTemp  \
0   0 days 00:00:14.093000     18.9      46.0    1017.1     False       26.5   
1   0 days 00:01:14.084000     18.9      46.0    1017.0     False       26.5   
2   0 days 00:02:14.093000     18.9      46.0    1017.0     False       26.5   
3   0 days 00:03:14.090000     18.9      45.0    1017.0     False       26.2   
4   0 days 00:04:14.091000     18.9      46.0    1017.0     False       26.2   
..                     ...      ...       ...       ...       ...        ...   
152 0 days 02:32:14.300000     17.6      51.0    1017.5     False       21.9   
153 0 days 02:33:14.300000     17.6      51.0    1017.5     False       21.9   
154 0 days 02:34:14.313000     17.7      51.0    1017.5     False       21.9   
155 0 days 02:35:14.305000     17.6      51.0    1017.5     False       21.9   
156 0 days 02:36:14.319000     17.7   

### Dataframe containing weather data for this session:
- Time (datetime.timedelta): session timestamp (time only)
- AirTemp (float): Air temperature [°C]
- Humidity (float): Relative humidity [%]
- Pressure (float): Air pressure [mbar]
- Rainfall (bool): Shows if there is rainfall
- TrackTemp (float): Track temperature [°C]
- WindDirection (int): Wind direction [°] (0°-359°)
- WindSpeed (float): Wind speed [m/s]

In [11]:
bahrain_2024.weather_data.head()

Unnamed: 0,Time,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,WindDirection,WindSpeed
0,0 days 00:00:14.093000,18.9,46.0,1017.1,False,26.5,162,0.9
1,0 days 00:01:14.084000,18.9,46.0,1017.0,False,26.5,55,1.0
2,0 days 00:02:14.093000,18.9,46.0,1017.0,False,26.5,55,1.0
3,0 days 00:03:14.090000,18.9,45.0,1017.0,False,26.2,85,1.1
4,0 days 00:04:14.091000,18.9,46.0,1017.0,False,26.2,178,1.0


In [17]:
# Exploring race results

results = bahrain_2024.results
print(f"\nRace Results Shape: {results.shape}")
print(f"Columns: {results.columns.tolist()}")
print(f"\nTop 5 finishers:")
results[['DriverNumber', 'Abbreviation', 'TeamName', 'Position', 'Points']].head()


Race Results Shape: (20, 22)
Columns: ['DriverNumber', 'BroadcastName', 'Abbreviation', 'DriverId', 'TeamName', 'TeamColor', 'TeamId', 'FirstName', 'LastName', 'FullName', 'HeadshotUrl', 'CountryCode', 'Position', 'ClassifiedPosition', 'GridPosition', 'Q1', 'Q2', 'Q3', 'Time', 'Status', 'Points', 'Laps']

Top 5 finishers:


Unnamed: 0,DriverNumber,Abbreviation,TeamName,Position,Points
1,1,VER,Red Bull Racing,1.0,26.0
11,11,PER,Red Bull Racing,2.0,18.0
55,55,SAI,Ferrari,3.0,15.0
16,16,LEC,Ferrari,4.0,12.0
63,63,RUS,Mercedes,5.0,10.0


In [14]:
# Lap-by-lap data
laps = bahrain_2024.laps
print(f"\nLap Data Shape: {laps.shape}")
print(f"Lap Columns: {laps.columns.tolist()}")


Lap Data Shape: (1129, 31)
Lap Columns: ['Time', 'Driver', 'DriverNumber', 'LapTime', 'LapNumber', 'Stint', 'PitOutTime', 'PitInTime', 'Sector1Time', 'Sector2Time', 'Sector3Time', 'Sector1SessionTime', 'Sector2SessionTime', 'Sector3SessionTime', 'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST', 'IsPersonalBest', 'Compound', 'TyreLife', 'FreshTyre', 'Team', 'LapStartTime', 'LapStartDate', 'TrackStatus', 'Position', 'Deleted', 'DeletedReason', 'FastF1Generated', 'IsAccurate']


In [16]:
# Using a driver to understand lap structure
russell_laps = laps.pick_driver('RUS')
print(f"\nRussell's laps: {len(russell_laps)}")
russell_laps[['LapNumber', 'LapTime', 'Sector1Time', 'Compound', 'TyreLife']].head()


Russell's laps: 57


Unnamed: 0,LapNumber,LapTime,Sector1Time,Compound,TyreLife
228,1.0,0 days 00:01:39.228000,NaT,SOFT,4.0
229,2.0,0 days 00:01:36.635000,0 days 00:00:30.829000,SOFT,5.0
230,3.0,0 days 00:01:36.406000,0 days 00:00:30.367000,SOFT,6.0
231,4.0,0 days 00:01:37.738000,0 days 00:00:31.135000,SOFT,7.0
232,5.0,0 days 00:01:38.116000,0 days 00:00:31.636000,SOFT,8.0


In [18]:
# Qualifying data
quali = fastf1.get_session(2024, 'Bahrain', 'Q')
quali.load()
quali_results = quali.results
print(f"\nQualifying results shape: {quali_results.shape}")
print("Pole position:")
quali_results[['Abbreviation', 'Q3']].head(1)

core           INFO 	Loading data for Bahrain Grand Prix - Qualifying [v3.6.0]
req            INFO 	No cached data found for session_info. Loading data...
_api           INFO 	Fetching session info data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for driver_info. Loading data...
_api           INFO 	Fetching driver list...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for session_status_data. Loading data...
_api           INFO 	Fetching session status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for track_status_data. Loading data...
_api           INFO 	Fetching track status data...
req            INFO 	Data has been written to cache!
req            INFO 	No cached data found for _extended_timing_data. Loading data...
_api           INFO 	Fetching timing data...
_api           INFO 	Parsing timing data...
req            INFO 


Qualifying results shape: (20, 22)
Pole position:


Unnamed: 0,Abbreviation,Q3
1,VER,0 days 00:01:29.179000
