In [1]:
import fastf1
import pandas as pd
import datetime
import os

from fastf1.ergast import Ergast



In [2]:
ergast = Ergast()

In [3]:
schedule = ergast.get_race_schedule(season=2021)
num_races = len(schedule)

race_names = schedule['raceName'].to_list()
print(race_names)

# note: Emilia Romangna Grand Prix is actually 'Autodromo Enzo e Dino Ferrari'
# this is because we're using the track name, not the event name
# it's all raceID anyways



['Bahrain Grand Prix', 'Emilia Romagna Grand Prix', 'Portuguese Grand Prix', 'Spanish Grand Prix', 'Monaco Grand Prix', 'Azerbaijan Grand Prix', 'French Grand Prix', 'Styrian Grand Prix', 'Austrian Grand Prix', 'British Grand Prix', 'Hungarian Grand Prix', 'Belgian Grand Prix', 'Dutch Grand Prix', 'Italian Grand Prix', 'Russian Grand Prix', 'Turkish Grand Prix', 'United States Grand Prix', 'Mexico City Grand Prix', 'São Paulo Grand Prix', 'Qatar Grand Prix', 'Saudi Arabian Grand Prix', 'Abu Dhabi Grand Prix']


In [4]:
def get_dID(code):
    mapping = {
        'ALO': 14,
        'BOT': 77,
        'GAS': 10,
        'GIO': 99,
        'HAM': 44,
        'KUB': 88,
        'LAT': 6,
        'LEC': 16,
        'MAZ': 9,
        'NOR': 4,
        'OCO': 31,
        'PER': 11,
        'RAI': 7,
        'RIC': 3,
        'RUS': 63,
        'SAI': 55,
        'MSC': 47,
        'STR': 18,
        'TSU': 22,
        'VER': 33,
        'VET': 5
    }
    return mapping.get(code, None)  # Returns None if the code is not in the mapping

# Example
piss = get_dID('VER')
print(piss) # 33

33


In [6]:
# david note: this block took ~6min to run the first time
#             2nd run is 53s due to caching
# change ER diagram for LapTelemetry: temp -> AirTemp, lapNumber -> rID

master_weather_df = pd.DataFrame()
master_driver_df = pd.DataFrame()

for i in range(0, num_races):
    session = fastf1.get_session(2021, race_names[i], 'R')
    session.load(weather=True)
    weather_df = session.weather_data
    weather_df['rID'] = i+1
    weather_df = weather_df.drop(columns=['Pressure', 'Humidity', 'WindDirection'])

    weather_df['Time'] = weather_df['Time'].dt.total_seconds()

    weather_df = weather_df.iloc[::len(weather_df)//4]
    
    master_weather_df = pd.concat([master_weather_df, weather_df], ignore_index=True)

    driver_df = session.laps[['Driver', 'Time', 'LapNumber', 'LapTime', 'Position', 'PitInTime', 'PitOutTime']]
    max_laps = driver_df['LapNumber'].max()

    driver_df['dID'] = driver_df['Driver'].apply(get_dID)
    driver_df['rID'] = i+1
    driver_df['Speed'] = driver_df['LapTime'].dt.total_seconds()
    driver_df['Time'] = driver_df['Time'].dt.total_seconds()
    driver_df['PitInTime'] = driver_df['PitInTime'].dt.total_seconds()
    driver_df['PitOutTime'] = driver_df['PitOutTime'].dt.total_seconds()

    driver_df = driver_df[
        (driver_df['LapNumber'] == 1) |                    # First lap
        (driver_df['LapNumber'] == max_laps) |              # Last lap
        (driver_df['PitInTime'].notna()) |                 # Has pit in time
        (driver_df['PitOutTime'].notna())
    ]

    driver_df = driver_df[
        driver_df['LapTime'].notna() &                      # Speed not null
        driver_df['Position'].notna()                     # Position not null
    ]

    master_driver_df = pd.concat([master_driver_df, driver_df], ignore_index=True)


# os.makedirs('../../sample_data/driver_telemetry', exist_ok=True)
# driver_df.to_csv('../../milestone-1/sample_data/driver_telemetry/driver_data_race_{}.csv'.format(i+1), index=False)

master_weather_df.to_csv('/Users/davidhe/Documents/GitHub/group-11-348/milestone-1/sample_data/lap_telemetry/master_weather_2021.csv', index=False)
master_driver_df.to_csv('/Users/davidhe/Documents/GitHub/group-11-348/milestone-1/sample_data/driver_telemetry/master_driver_2021.csv', index=False)

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.6.1]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '33', '77', '4', '11', '16', '3', '55', '22', '18', '7', '99', '31', '63', '5', '47', '10', '6', '14', '9']
A value is trying to be set on a copy of a slice from a DataFram

In [7]:
print(session.laps.columns)
print(session.laps.head())

Index(['Time', 'Driver', 'DriverNumber', 'LapTime', 'LapNumber', 'Stint',
       'PitOutTime', 'PitInTime', 'Sector1Time', 'Sector2Time', 'Sector3Time',
       'Sector1SessionTime', 'Sector2SessionTime', 'Sector3SessionTime',
       'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST', 'IsPersonalBest',
       'Compound', 'TyreLife', 'FreshTyre', 'Team', 'LapStartTime',
       'LapStartDate', 'TrackStatus', 'Position', 'Deleted', 'DeletedReason',
       'FastF1Generated', 'IsAccurate'],
      dtype='object')
                    Time Driver DriverNumber                LapTime  \
0 0 days 01:04:01.390000    GAS           10 0 days 00:01:39.330000   
1 0 days 01:05:34.204000    GAS           10 0 days 00:01:32.814000   
2 0 days 01:07:05.737000    GAS           10 0 days 00:01:31.533000   
3 0 days 01:08:37.176000    GAS           10 0 days 00:01:31.439000   
4 0 days 01:10:08.543000    GAS           10 0 days 00:01:31.367000   

   LapNumber  Stint PitOutTime PitInTime            Sector1Time  \
0 

In [8]:
#
formatted_weather = ",\n".join(
    f"({row['rID']}, {row['Time']}, {row['Rainfall']}, {row['WindSpeed']}, {row['TrackTemp']}, {row['AirTemp']})"
    for _, row in master_weather_df.iterrows()
)
print(formatted_weather)

# format is exactly (rID, Time, rainFall, windSpeed, trackTemp, airTemp)

(1, 43.04, False, 1.0, 29.9, 20.9),
(1, 2023.04, False, 1.0, 28.5, 20.4),
(1, 4003.036, False, 0.6, 27.7, 20.5),
(1, 5983.051, False, 0.9, 26.5, 20.5),
(1, 7963.164, False, 0.9, 26.1, 20.6),
(2, 41.291, True, 0.2, 18.1, 9.8),
(2, 2441.413, True, 0.2, 16.5, 9.8),
(2, 4841.419, False, 1.1, 15.4, 11.1),
(2, 7241.413, False, 0.3, 18.1, 11.3),
(2, 9641.411, False, 0.3, 18.6, 12.3),
(3, 4.02, False, 0.8, 38.5, 19.5),
(3, 1984.144, False, 0.9, 40.3, 19.7),
(3, 3964.17, False, 0.6, 39.6, 19.6),
(3, 5944.143, False, 0.9, 39.3, 19.3),
(3, 7924.27, False, 0.8, 34.1, 18.1),
(4, 31.507, False, 0.2, 34.1, 22.3),
(4, 1951.679, False, 0.0, 31.5, 21.4),
(4, 3871.804, False, 0.7, 32.1, 22.4),
(4, 5791.974, False, 0.7, 32.8, 22.4),
(4, 7712.129, False, 0.4, 35.5, 23.5),
(5, 25.218, False, 0.1, 36.9, 20.7),
(5, 2005.36, False, 0.3, 43.4, 20.8),
(5, 3985.543, False, 0.4, 39.4, 21.0),
(5, 5965.607, False, 0.3, 33.9, 20.4),
(5, 7945.67, False, 0.6, 34.4, 20.9),
(6, 4.825, False, 0.3, 43.9, 25.4),
(6, 2584.94

In [21]:
master_driver_df = master_driver_df.fillna('NULL')

formatted_drivers = ",\n".join(
    f"({row['dID']},{row['rID']}, {row['LapNumber']}, {row['Speed']}, {row['Time']}, {row['Position']}, {row['PitInTime']}, {row['PitOutTime']})"
    for _, row in master_driver_df.iterrows()
)
print(formatted_drivers)



(44,1, 1.0, 119.538, 2349.686, 2.0, NULL, NULL),
(44,1, 13.0, 98.924, 3631.298, 2.0, 3629.611, NULL),
(44,1, 14.0, 115.006, 3746.304, 4.0, NULL, 3654.547),
(44,1, 28.0, 97.835, 5082.938, 2.0, 5081.24, NULL),
(44,1, 29.0, 115.194, 5198.132, 3.0, NULL, 5105.375),
(44,1, 56.0, 94.918, 7754.047, 1.0, NULL, NULL),
(33,1, 1.0, 118.245, 2348.393, 1.0, NULL, NULL),
(33,1, 17.0, 99.153, 4016.905, 1.0, 4015.22, NULL),
(33,1, 18.0, 115.532, 4132.437, 2.0, NULL, 4040.122),
(33,1, 39.0, 97.828, 6135.088, 1.0, 6133.427, NULL),
(33,1, 40.0, 113.437, 6248.525, 2.0, NULL, 6157.245),
(33,1, 56.0, 94.727, 7754.768, 2.0, NULL, NULL),
(77,1, 1.0, 123.772, 2353.92, 4.0, NULL, NULL),
(77,1, 16.0, 99.534, 3926.391, 2.0, 3924.696, NULL),
(77,1, 17.0, 114.719, 4041.11, 4.0, NULL, 3948.94),
(77,1, 30.0, 98.028, 5281.018, 2.0, 5279.311, NULL),
(77,1, 31.0, 123.818, 5404.836, 5.0, NULL, 5312.248),
(77,1, 54.0, 97.802, 7583.439, 3.0, 7581.738, NULL),
(77,1, 55.0, 115.899, 7699.338, 3.0, NULL, 7606.324),
(77,1, 56.0