In [29]:
import pandas as pd
import numpy as np
import fastf1
import datetime
fastf1.Cache.enable_cache('cache')  

In [30]:
def find_fastest_laps(laps, cols, lap_num):
    laps = laps.sort_values('LapTime')
    fastest = laps.groupby('DriverNumber').nth(1).reset_index()

    fastest_lap = fastest['LapTime'].min()
    fastest['LapPercent'] = (fastest['LapTime'] - fastest_lap) / fastest_lap
    fastest = fastest[cols]

    for i in range(2,lap_num + 1):
        laps_i = laps.groupby('DriverNumber').nth(i).reset_index()
        laps_i['LapPercent'] = (laps_i['LapTime'] - fastest_lap) / fastest_lap
        fastest = fastest.merge(laps_i[cols], on=['DriverNumber'], suffixes=('', '_' + str(i)))
    return fastest

In [31]:
def load_practice(event, type, year):
    practice1 = fastf1.get_session(year, event, 'Practice 1')
    practice1.load()
    laps1 = practice1.laps
    cols = ['DriverNumber', 'TyreLife', 'LapPercent']
    practice1 = find_fastest_laps(laps1, cols, 2)

    practice2 = practice1
    if type == 'conventional':
        practice2 = fastf1.get_session(year, event, 'Practice 2')
        practice2.load()
        laps2 = practice2.laps
        practice2 = find_fastest_laps(laps2, cols, 2)

    practice_data = practice1.merge(practice2, on=['DriverNumber'], suffixes=('_1', '_2'))
    return practice_data
   
def load_qualifying(event, year):
        qualifying = fastf1.get_session(year, event, 'Q')
        qualifying.load()
        results = qualifying.results

        results['FastestLap'] = results[['Q1', 'Q2', 'Q3']].apply(lambda x: min(x), axis = 1)

        fastest_lap_q = results['FastestLap'].min()
        results['LapPercent'] = (results['FastestLap'] - fastest_lap_q)/ fastest_lap_q
        return results[['DriverNumber', 'Position', 'LapPercent']]


In [32]:
sched_2022 = fastf1.get_event_schedule(2022)

In [33]:
data = pd.DataFrame()
for index, row in sched_2022.iterrows():
    if row['EventFormat'] in ['conventional', 'sprint_shootout']:
        practice_data = load_practice(row['EventName'], row['EventFormat'], 2022)
        qualifying_data = load_qualifying(row['EventName'], 2022)
        full_data = practice_data.merge(qualifying_data, on='DriverNumber')
        data = pd.concat([data, full_data], axis = 0).reset_index(drop=True)

core           INFO 	Loading data for Bahrain Grand Prix - Practice 1 [v2.3.1]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  for key, value in row.iteritems():
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  for key, value in row.iteritems():
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  for key, value in row.iteritems():
  df = pd.concat([df, result], sort=Fals

In [34]:
data.to_pickle('train_data.pkl')

In [35]:
sched_2023 = fastf1.get_event_schedule(2023)

In [36]:
pred_event = sched_2023[sched_2023['RoundNumber'] == 1]

In [37]:
pred_data = load_practice(pred_event['EventName'].iloc[0], pred_event['EventFormat'].iloc[0], 2023)

core           INFO 	Loading data for Bahrain Grand Prix - Practice 1 [v2.3.1]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  for key, value in row.iteritems():
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  for key, value in row.iteritems():
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  df = pd.concat([df, result], sort=False)
  for key, value in row.iteritems(

In [38]:
pred_data.to_pickle('pred_data.pkl')