In [1]:
import pandas as pd
import numpy as np
import fastf1
import datetime
fastf1.Cache.enable_cache('cache') 

In [2]:
def find_fastest_laps(laps, cols, index_cols, lap_num):
    laps = laps.sort_values('LapTime')
    fastest = laps.groupby('DriverNumber').nth(1).reset_index()

    fastest_lap = fastest['LapTime'].min()
    fastest['LapPercent'] = (fastest['LapTime'] - fastest_lap) / fastest_lap
    fastest = fastest[cols + index_cols]

    for i in range(2,lap_num + 1):
        laps_i = laps.groupby('DriverNumber').nth(i).reset_index()
        laps_i['LapPercent'] = (laps_i['LapTime'] - fastest_lap) / fastest_lap
        fastest = fastest.merge(laps_i[cols], on=['DriverNumber'], how = 'outer', suffixes=('', '_' + str(i)))
    return fastest

In [3]:
def load_practice(event, type, year):
    practice1 = fastf1.get_session(year, event, 'Practice 1')
    practice1.load()
    try:
        laps1 = practice1.laps
        cols = ['DriverNumber', 'TyreLife', 'LapPercent']
        index_cols = ['Team', 'Driver']
        practice1 = find_fastest_laps(laps1, cols, index_cols, 2)

        practice2 = practice1
        if type == 'conventional':
            practice2 = fastf1.get_session(year, event, 'Practice 2')
            practice2.load()
            laps2 = practice2.laps
            practice2 = find_fastest_laps(laps2, cols, index_cols, 2)

        practice_data = practice1.merge(practice2, on=['DriverNumber'] + index_cols, how = 'outer', suffixes=('_1', '_2'))
        return practice_data
    except:
         return pd.DataFrame()
   
def load_qualifying(event, year):
        qualifying = fastf1.get_session(year, event, 'Q')
        qualifying.load()
        try:
            results = qualifying.results

            results['FastestLap'] = results[['Q1', 'Q2', 'Q3']].apply(lambda x: min(x), axis = 1)

            fastest_lap_q = results['FastestLap'].min()
            results['LapPercent'] = (results['FastestLap'] - fastest_lap_q)/ fastest_lap_q
            return results[['DriverNumber', 'Position', 'LapPercent']]
        except:
             return pd.DataFrame()


In [4]:
years = [2021, 2022, 2023]

data = pd.DataFrame()
for year in years:
    schedule = fastf1.get_event_schedule(year)
    for index, row in schedule.iterrows():
        if row['EventFormat'] in ['conventional', 'sprint_shootout']:
            practice_data = load_practice(row['EventName'], row['EventFormat'], year)
            qualifying_data = load_qualifying(row['EventName'], year)
            if 'DriverNumber' in practice_data.columns and 'DriverNumber' in qualifying_data.columns:
                full_data = practice_data.merge(qualifying_data, on='DriverNumber')
                for col in ['RoundNumber', 'Country', 'Location', 'OfficialEventName', 'EventDate','EventName', 'EventFormat']:
                    full_data[col] = row[col]
                data = pd.concat([data, full_data], axis = 0).reset_index(drop=True)

core           INFO 	Loading data for Bahrain Grand Prix - Practice 1 [v2.3.1]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
  for key, value in row.iteritems():
  df = pd.concat([df, result], sort=False)
  for key, value in row.iteritems():
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  for key, value in row.iteritems():
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=Fals

In [5]:
data.groupby(['OfficialEventName']).count()

Unnamed: 0_level_0,DriverNumber,TyreLife_1,LapPercent_1,Team,Driver,TyreLife_2_1,LapPercent_2_1,TyreLife_2,LapPercent_2,TyreLife_2_2,LapPercent_2_2,Position,LapPercent,RoundNumber,Country,Location,EventDate,EventName,EventFormat
OfficialEventName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
FORMULA 1 ARAMCO GRAN PREMIO DE ESPAÑA 2021,20,18,18,20,20,18,18,20,20,20,20,20,20,20,20,20,20,20,20
FORMULA 1 ARAMCO MAGYAR NAGYDÍJ 2022,20,19,19,20,20,19,19,20,20,20,20,20,20,20,20,20,20,20,20
FORMULA 1 ARAMCO UNITED STATES GRAND PRIX 2021,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20
FORMULA 1 ARAMCO UNITED STATES GRAND PRIX 2022,20,15,15,20,20,15,15,20,20,20,20,20,20,20,20,20,20,20,20
FORMULA 1 AWS GRAN PREMIO DE ESPAÑA 2023,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20,20
FORMULA 1 AWS GRAND PRIX DU CANADA 2022,20,20,20,20,20,20,20,20,19,20,19,20,20,20,20,20,20,20,20
FORMULA 1 AZERBAIJAN GRAND PRIX 2021,20,20,20,20,20,20,20,20,20,20,20,20,18,20,20,20,20,20,20
FORMULA 1 AZERBAIJAN GRAND PRIX 2022,20,20,19,20,20,20,19,20,20,20,20,20,20,20,20,20,20,20,20
FORMULA 1 AZERBAIJAN GRAND PRIX 2023,20,20,20,20,20,20,20,20,20,20,20,20,19,20,20,20,20,20,20
FORMULA 1 BWT GROSSER PREIS DER STEIERMARK 2021,20,19,19,20,20,19,19,19,19,19,19,20,20,20,20,20,20,20,20


In [6]:
data.to_pickle('train_data.pkl')

In [35]:
sched_2023 = fastf1.get_event_schedule(2023)

In [36]:
pred_event = sched_2023[sched_2023['RoundNumber'] == 1]

In [37]:
pred_data = load_practice(pred_event['EventName'].iloc[0], pred_event['EventFormat'].iloc[0], 2023)

core           INFO 	Loading data for Bahrain Grand Prix - Practice 1 [v2.3.1]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  for key, value in row.iteritems():
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  for key, value in row.iteritems():
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  for key, value in row.iteritems(

In [38]:
pred_data.to_pickle('pred_data.pkl')