In [1]:
import pandas as pd
import numpy as np
import fastf1
import datetime
from sklearn.linear_model import LinearRegression
fastf1.Cache.enable_cache('cache')  

In [2]:
def find_fastest_laps(laps, cols, lap_num):
    laps = laps.sort_values('LapTime')
    fastest = laps.groupby('DriverNumber').nth(1).reset_index()

    fastest_lap = fastest['LapTime'].min()
    fastest['LapPercent'] = (fastest['LapTime'] - fastest_lap) / fastest_lap
    fastest = fastest[cols]

    for i in range(2,lap_num + 1):
        laps_i = laps.groupby('DriverNumber').nth(i).reset_index()
        laps_i['LapPercent'] = (laps_i['LapTime'] - fastest_lap) / fastest_lap
        fastest = fastest.merge(laps_i[cols], on=['DriverNumber'], suffixes=('', '_' + str(i)))
    return fastest

In [3]:
def load_race(event_row, year):
    if event_row['EventFormat'] in ['conventional', 'sprint']:
        practice1 = fastf1.get_session(year, event_row['EventName'], 'Practice 1')
        practice1.load()
        laps1 = practice1.laps
        cols = ['DriverNumber', 'TyreLife', 'LapPercent']
        practice1 = find_fastest_laps(laps1, cols, 2)

        practice2 = practice1
        if event_row['EventFormat'] == 'conventional':
            practice2 = fastf1.get_session(year, event_row['EventName'], 'Practice 2')
            practice2.load()
            laps2 = practice2.laps
            practice2 = find_fastest_laps(laps2, cols, 2)

        practice_data = practice1.merge(practice2, on=['DriverNumber'], suffixes=('_1', '_2'))

        qualifying = fastf1.get_session(year, event_row['EventName'], 'Q')
        qualifying.load()
        results = qualifying.results

        results['FastestLap'] = results[['Q1', 'Q2', 'Q3']].apply(lambda x: min(x), axis = 1)

        fastest_lap_q = results['FastestLap'].min()
        results['LapPercent'] = (results['FastestLap'] - fastest_lap_q)/ fastest_lap_q
        full_data = practice_data.merge(results[['DriverNumber', 'Position', 'LapPercent']], on='DriverNumber')
        return full_data
    
    return pd.DataFrame()
        


In [4]:
sched_2022 = fastf1.get_event_schedule(2022)

In [8]:
data = pd.DataFrame()
for index, row in sched_2022.iloc[:3].iterrows():
    data = pd.concat([data, load_race(row, 2022)], axis = 0)

core           INFO 	Loading data for Bahrain Grand Prix - Practice 1 [v2.3.1]
api            INFO 	No cached data found for driver_info. Loading data...
api            INFO 	Fetching driver list...
api            INFO 	Data has been written to cache!
api            INFO 	No cached data found for timing_data. Loading data...
api            INFO 	Fetching timing data...
api            INFO 	Parsing timing data...
api            INFO 	Data has been written to cache!
api            INFO 	No cached data found for timing_app_data. Loading data...
api            INFO 	Fetching timing app data...
api            INFO 	Data has been written to cache!
core           INFO 	Processing timing data...
api            INFO 	No cached data found for session_status_data. Loading data...
api            INFO 	Fetching session status data...
api            INFO 	Data has been written to cache!
api            INFO 	No cached data found for track_status_data. Loading data...
api            INFO 	Fetching tra

In [9]:
data = data.dropna()

In [10]:
data

Unnamed: 0,DriverNumber,TyreLife_1,LapPercent_1,TyreLife_2_1,LapPercent_2_1,TyreLife_2,LapPercent_2,TyreLife_2_2,LapPercent_2_2,Position,LapPercent
0,1,2.0,0.0,2.0,0.031472,2.0,0.014719,4.0,0.015477,2.0,0.001358
1,10,4.0,0.007238,10.0,0.019392,5.0,0.016756,6.0,0.028636,10.0,0.011893
2,11,4.0,0.004832,2.0,0.012903,3.0,0.022078,8.0,0.053705,4.0,0.004008
3,14,4.0,0.004178,2.0,0.004895,4.0,0.01228,2.0,0.022859,8.0,0.011738
4,16,9.0,0.014496,6.0,0.016416,2.0,0.0,2.0,0.0093,1.0,0.0
5,18,5.0,0.006183,6.0,0.027632,5.0,0.024127,2.0,0.03542,19.0,0.02732
6,20,3.0,0.022715,8.0,0.027062,2.0,0.025026,4.0,0.027302,7.0,0.009972
7,22,4.0,0.002606,2.0,0.002933,8.0,0.01913,2.0,0.023195,16.0,0.024205
8,23,5.0,0.013367,5.0,0.04006,4.0,0.033134,5.0,0.04227,14.0,0.023256
9,24,2.0,0.00997,8.0,0.030311,4.0,0.019022,4.0,0.024875,15.0,0.021368


In [11]:
X = data.drop(['DriverNumber', 'Position', 'LapPercent'], axis = 1)
y = data['LapPercent']

In [12]:
model = LinearRegression().fit(X, y)

In [13]:
pd.concat([pd.DataFrame(X.columns),pd.DataFrame(np.transpose(model.coef_))], axis = 1)

Unnamed: 0,0,0.1
0,TyreLife_1,-5.7e-05
1,LapPercent_1,0.047957
2,TyreLife_2_1,0.000334
3,LapPercent_2_1,0.243729
4,TyreLife_2,0.003173
5,LapPercent_2,0.398266
6,TyreLife_2_2,-0.001936
7,LapPercent_2_2,0.192507
