In [202]:
DRIVER = 'ALO'
YEAR = 2022
RACE = 'Austria'

DRIVERS = {
    'ALB': 23,
    'ALO': 14,   
    'BOT': 77,
    'GAS': 10,
    'HAM': 44,
    'HUL': 27,
    'LAT': 6,
    'LEC': 16,
    'MAG': 20,
    'NOR': 4,
    'OCO': 31,
    'PER': 11,
    'RIC': 3,
    'RUS': 63,
    'SAI': 55,
    'MSC': 47,
    'STR': 18,
    'TSU': 22,
    'VER': 33,
    'VET': 5,
    'ZHO': 24,
}

DRIVER_NUMBERS = {}
for k, v in DRIVERS.items():
    DRIVER_NUMBERS[str(v)] = k

In [9]:
import fastf1 as ff1
ff1.Cache.enable_cache('../data/cache') 

session = ff1.get_session(YEAR, RACE, 'Race')
session.load(telemetry=True, laps=True, messages=True, weather=False)

core           INFO 	Loading data for Austrian Grand Prix - Race [v2.2.9]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
api            INFO 	Using cached data for car_data
api            INFO 	Using cached data for position_data
api            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '1', '44', '63', '31', '47', '4', '20', '3', '14', '77', '23', '18', '24', '10', '22', '5', '55', '6', '11']


In [416]:
from IPython.display import display, Markdown, HTML
import pandas as pd
import numpy as np

In [8]:
# Race control messages
pd.options.display.max_rows = 200
display(session.race_control_messages)

Unnamed: 0,Time,Category,Message,Status,Flag,Scope,Sector,RacingNumber
0,2022-07-10 12:20:00,Flag,GREEN LIGHT - PIT EXIT OPEN,,GREEN,Track,,
1,2022-07-10 12:30:00,Other,PIT EXIT CLOSED,,,,,
2,2022-07-10 12:45:19,Other,RISK OF RAIN FOR F1 RACE IS 10%,,,,,
3,2022-07-10 12:57:06,Drs,DRS DISABLED,DISABLED,,,,
4,2022-07-10 13:03:14,Flag,GREEN LIGHT - PIT EXIT OPEN,,GREEN,Track,,
5,2022-07-10 13:03:47,Other,INCIDENT INVOLVING CARS 22 (TSU) AND 23 (ALB) ...,,,,,
6,2022-07-10 13:05:30,Drs,DRS ENABLED,ENABLED,,,,
7,2022-07-10 13:05:41,Other,TURN 4 INCIDENT INVOLVING CARS 63 (RUS) AND 11...,,,,,
8,2022-07-10 13:07:55,Other,FIA STEWARDS: INCIDENT INVOLVING CARS 22 (TSU)...,,,,,
9,2022-07-10 13:09:34,Other,CAR 44 (HAM) TIME 1:11.802 DELETED - TRACK LIM...,,,,,


In [624]:
_, timing_data = ff1.api.timing_data(session.api_path)

driver_number = DRIVERS[DRIVER]
driver_timing = timing_data.query(f'Driver == "{driver_number}"')

current_position = driver_timing.iloc[0]["Position"]
position_changes = []
for _, t in driver_timing.iterrows():
    if t["Position"] != current_position:
        position_changes.append(t)
        current_position = t["Position"]
        
df = pd.DataFrame(data=position_changes)
df.rename(columns={'Time': 'SessionTime'}, inplace=True)
display(df)

api            INFO 	Using cached data for timing_data


Unnamed: 0,SessionTime,Driver,Position,GapToLeader,IntervalToPositionAhead
22672,0 days 01:02:53.570000,14,18,+4.928,0.028
22674,0 days 01:03:01.819000,14,17,+4.863,0.2
22677,0 days 01:03:12.016000,14,18,+6.001,0.166
22691,0 days 01:03:53.573000,14,17,+9.450,0.022
22694,0 days 01:04:01.850000,14,18,+9.521,0.447
22697,0 days 01:04:06.698000,14,17,+9.933,0.028
22699,0 days 01:04:16.591000,14,18,+9.920,0.58
22704,0 days 01:04:30.901000,14,17,+11.370,0.05
22892,0 days 01:15:42.434000,14,15,+26.337,0.41
22909,0 days 01:16:44.358000,14,14,+27.844,0.354


In [371]:
# convinience funcs

def get_position_at_lap(lap: ff1.core.Lap, drv_timing):
    lap_start, lap_end = get_lap_start_end(lap)
    
    # if there is no exact match, get closest
    closest_idx_lap_start = drv_timing[drv_timing["Time"] >= lap_start]["Time"].idxmin()
    position_at_start = drv_timing.iloc[closest_idx_lap_start]["Position"]
    
    closest_idx_lap_end = drv_timing[drv_timing["Time"] >= lap_end]["Time"].idxmin()
    position_at_end = drv_timing.iloc[closest_idx_lap_end]["Position"]
    
    return (position_at_start, position_at_end)

def get_lap_start_end(lap: ff1.core.Lap, keep_null=False):
    session_time = lap["Time"]
    lap_time = lap["LapTime"]
    
    lap_start = session_time - lap_time
    if not keep_null and pd.isnull(lap_start):
        lap_start = pd.Timedelta(0, 's')
    return lap_start, session_time

def merge_lap_number_to_timing(laps: ff1.core.Laps, df: pd.DataFrame):
    if 'LapNumber' not in df:
        df["LapNumber"] = None
        
    for idx, t in laps.iterrows():
        lap_number = idx + 1
        session_time = t["Time"]
        lap_time = t["LapTime"]

        lap_start = session_time - lap_time
        if pd.isnull(lap_start):
            lap_start = pd.Timedelta(0, 's')
        lap_end = session_time

        matching = df[(df["Time"] >= lap_start) & (df["Time"] <= lap_end)]
        for ii, m in matching.iterrows():
            df.at[ii, "LapNumber"] = lap_number
    
    
# Convert interval (formatted as +xx:xx) to timedelta
def to_timedelta(x):
    if isinstance(x, str):
        return ff1.utils.to_timedelta(x.lstrip('+'))

In [449]:
# Expected result for Alonso during 2022 Austrian GP

# lap, position
ALONSO_TEST = [
    (2, 17),
    (24, 9),
    (32, 17),
    (33, 16),
    (40, 13),
    (61, 13),
    (62, 12),
    (67, 11),
    (70, 10)
]

def test_alonso_data(df, strict=False):
    missing = []
    for lap, pos in ALONSO_TEST:
        if df.query(f'LapNumber == {lap} and Position == {pos}').empty:
            print(f'Missing - Lap {lap} for P{pos}')
    if strict:
        print()
        check_lap = df.copy()
        for lap, pos in ALONSO_TEST:
            check_lap = check_lap[~check_lap.filter(['LapNumber', 'Position']).isin({'LapNumber': [lap], 'Position': [pos]}).all(1)]
        if not check_lap.empty:
            print('Unexpected results')
            display(check_lap)
     
test_data_missing = {'LapNumber': [2, 24, 32, 33, 40, 61], 'Position': [17, 9, 17, 16, 13, 13]}
test_alonso_data(pd.DataFrame(data=test_data_missing), strict=True)
     
test_data = {'LapNumber': [2, 2, 24, 32, 33, 40, 61, 62, 67, 70], 'Position': [17, 10, 9, 17, 16, 13, 13, 12, 11, 10], 'Foo': range(0, 10)}
test_alonso_data(pd.DataFrame(data=test_data), strict=True)
        

Missing - Lap 62 for P12
Missing - Lap 67 for P11
Missing - Lap 70 for P10


Unexpected results


Unnamed: 0,LapNumber,Position,Foo
1,2,10,1


In [372]:
# Overtake
debug = False
pd.options.display.max_rows = 60

laps_data, timing_data = ff1.api.timing_data(session.api_path)

driver_number = DRIVERS[DRIVER]
driver_timing = timing_data.query(f'Driver == "{driver_number}"').reset_index(drop=True)

driver_laps = session.laps.pick_driver(driver_number).reset_index(drop=True)

current_position = driver_timing.iloc[0]["Position"]
position_changes = [driver_timing.iloc[0]]
for _, t in driver_timing.iterrows():
    if t["Position"] != current_position:
        position_changes.append(t)
        current_position = t["Position"]
        
df = pd.DataFrame(data=position_changes)
df.reset_index(inplace=True, drop=True)

# add lap numbers to df
merge_lap_number_to_timing(driver_laps, df)

# convert interval to timedelta 
df['IntervalToPositionAhead'] = df['IntervalToPositionAhead'].apply(lambda x: to_timedelta(x))

# calculate possible overtakes
threashold = pd.Timedelta(3, 's') # driver must maintain position improvement for at least this amount of time
possible_overtakes = []
for idx, t in df.iterrows():
    if idx == 0:
        continue
        
    this_lap = t["LapNumber"]
    this_position = t["Position"]
    
    if idx < len(df) - 2:
        next_t = df.iloc[idx+1]
        next_position = next_t["Position"]
        next_time = next_t["Time"]
    else:
        next_t = None
        next_position = None
        next_time = None
    
    prev_t = df.iloc[idx-1]
    prev_position = prev_t["Position"]
    prev_time = prev_t["Time"]
    
    # only care about if this position improved from the previous position
    if prev_position < this_position:
        continue
        
    # if the next position is the same as the previous position, no overtake may be happening
    # and they are running side by side trading posistions.
    # in this case, make sure the current position persisted for at least the threashold amount.
    if next_position and prev_position == next_position:
        if next_time - prev_time < threashold:
            continue
            
    if debug:
        print('----')
        print(f'previous - pos: {prev_position} time: {prev_time}')
        print(f'current  - pos: {this_position} time: {t["Time"]}')
        print(f'next     - pos: {next_position} time: {next_time}')

    t_copy = t.copy()
    t_copy["PositionBefore"] = prev_position
    possible_overtakes.append(t_copy)

df_overtakes = pd.DataFrame(data=possible_overtakes).reset_index(drop=True)

test_alonso_data(df_overtakes)

print('done')

api            INFO 	Using cached data for timing_data


done


In [622]:
# try to further refine df_overtakes based driver passed data
max_laps = session.laps["LapNumber"].max()

driver_laps = session.laps.pick_driver(driver_number)

_, all_timing = ff1.api.timing_data(session.api_path)
driver_timing = all_timing.query(f'Driver == "{driver_number}"').reset_index(drop=True)

df = df_overtakes.copy()

df["DriverPassed"] = None
df["DriverPassedStatus"] = None # Pit, OffTrack, Retired, Lapped
df["DriverPassedPositionAtLapEnd"] = None
df["PositionAtLapStart"] = None
df["PositionAtLapEnd"] = None

column_order = [
    'Time',
    'Driver',
    'LapNumber',
    'Position',
    'PositionBefore',
    'PositionAtLapStart',
    'PositionAtLapEnd',
    'DriverPassed',
    'DriverPassedStatus',
    'DriverPassedPositionAtLapEnd',
    'GapToLeader',
    'IntervalToPositionAhead',
]

# returns matching lap
def get_lap_at_time(time, laps: ff1.core.Laps) -> ff1.core.Lap:
    # lap end = time
    # lap start = time - lap time
    laps = laps[(laps["Time"] >= time) & (laps["Time"] - laps["LapTime"] <= time)]
    if (len(laps) > 1):
        display(laps)
        raise Exception(f'Expected at most 1 lap')
        
    if laps.empty:
        return ff1.core.Lap()
    
    lap = laps.loc[laps['Time'].idxmin()]
    if isinstance(lap, pd.DataFrame):
        # More laps, same time
        lap = lap.iloc[0]  # take first clocked
    return lap
    

def is_time_during_pit(time: pd.Timedelta, laps: ff1.core.Laps, lap_number_hint=0, pit_in_threashold=pd.Timedelta(0, 's'), pit_out_threashold=pd.Timedelta(0, 's')):
    if lap_number_hint:
        laps = laps[(laps["LapNumber"] >= lap_number_hint - 1) & (laps["LapNumber"] <= lap_number_hint + 1)]
    
    last_pit_in = None
    for _, lap in laps.iterrows():
        pit_in = lap['PitInTime'] - pit_in_threashold
        pit_out = lap['PitOutTime'] + pit_out_threashold

        if not pd.isnull(pit_in):
            if time <= pit_in and time >= pit_in:
                return True
            last_pit_in = pit_in

        if not pd.isnull(pit_out):
            if time <= pit_out and time >= pit_out:
                return True
            if last_pit_in:
                if time >= last_pit_in and time <= pit_out:
                    return True
                last_pit_in = None

# populate passed driver
for idx, lap in driver_laps.iterlaps():
    lap_number = lap["LapNumber"]
    if lap_number == 1:
        continue
 
    # filter overtakes by lap number
    overtakes = df_overtakes.query(f'LapNumber == {lap_number}')
    if overtakes.empty:
        continue
    
    # lap start position and lap end position
    pos_start, pos_end = get_position_at_lap(lap, driver_timing)
    
    for oidx, overtake in overtakes.iterrows():
        time = overtake['Time']
        position = overtake['Position']
        position_before = overtake['PositionBefore']
        
        # Populate positions at lap start / end
        df.at[oidx, "PositionAtLapStart"] = pos_start
        df.at[oidx, "PositionAtLapEnd"] = pos_end
        
        # Find the passed driver timing by closest time
        cd2 = all_timing[(all_timing['Position'] == position_before) & (all_timing['Time'] <= time)]
        mi2 = cd2["Time"].idxmax()
        passed_driver_timing = all_timing.iloc[mi2]
        passed_driver_number = passed_driver_timing["Driver"]
        
        # Populate driver passed
        if passed_driver_number is None:
            continue
        df.at[oidx, "DriverPassed"] = passed_driver_timing["Driver"]
            
        passed_driver_laps = session.laps.pick_driver(passed_driver_number)
        passed_driver_lap = get_lap_at_time(time, passed_driver_laps)
        
        # This driver is likely retired, updated status accordingly
        if passed_driver_lap.empty:
            pd_max_laps = passed_driver_laps["LapNumber"].max()
            if pd_max_laps < max_laps:
                df.at[oidx, "DriverPassedStatus"] = "Retired"
            else:
                df.at[oidx, "DriverPassedStatus"] = "Unknown"
                
        # Check if the passed driver is pitted
        elif is_time_during_pit(time, passed_driver_laps, lap_number_hint=passed_driver_lap["LapNumber"]):
            df.at[oidx, "DriverPassedStatus"] = "Pit"
            
        # The lap is considered not accurate if pit stop occurs (and other criteria)
        # If not accurate, try checking pit window again with a very wide tolerance
        elif not passed_driver_lap["IsAccurate"]:
            tolerance = pd.Timedelta(10, 's')
            if not (pd.isnull(passed_driver_lap['PitInTime']) and pd.isnull(passed_driver_lap['PitOutTime'])):
                if is_time_during_pit(time, passed_driver_laps, passed_driver_lap["LapNumber"], tolerance, tolerance):
                    df.at[oidx, "DriverPassedStatus"] = "MaybePit"
            else:
                df.at[oidx, "DriverPassedStatus"] = "NotAccurate"
            
        # Ensure car is not lapped
        elif passed_driver_lap["LapNumber"] < lap_number:
            df.at[oidx, "DriverPassedStatus"] = "Lapped"
            
        # Check if the car is off track
        else:
            tol = pd.Timedelta(5, 's')
            pos_data = passed_driver_lap.get_pos_data().slice_by_time(time-tol, time+tol)
            if not pos_data[pos_data["Status"] == "OffTrack"].empty:
                df.at[oidx, "DriverPassedStatus"] = "OffTrack"
            else:
                # car maya have spun out or went into the gravel - check if the car is going very slow at this time
                tol = pd.Timedelta(1, 's')
                car_data = passed_driver_lap.get_car_data().slice_by_time(time-tol, time+tol)
                if car_data["Speed"].mean() <= 30:
                    df.at[oidx, "DriverPassedStatus"] = "MaybeOffTrack"
        
df = df[column_order]

# collapse data where multiple overtakes occur in one lap
df = df.drop_duplicates(["LapNumber", "Position", "DriverPassed"], keep='last')

# filter data without driver passed
df = df[~df["DriverPassed"].isnull()]

display(Markdown('## Full overtake data'))
display(df)

# filter data with any status
df = df[df["DriverPassedStatus"].isnull()]


display(Markdown('## Filtered overtake data'))
display(df)
    
for _, t in df.iterrows():
    lap = t["LapNumber"]
    driver = t["Driver"]
    driver_passed = t["DriverPassed"]
    position = t["Position"]
    time = t["Time"]
    
    if driver_passed:
        print(f'Lap {lap} {DRIVER_NUMBERS[driver]} on {DRIVER_NUMBERS[driver_passed]} for P{position} at {time}')
        
test_alonso_data(df, True)
        

api            INFO 	Using cached data for timing_data
  super().__init__(*args, **kwargs)


## Full overtake data

Unnamed: 0,Time,Driver,LapNumber,Position,PositionBefore,PositionAtLapStart,PositionAtLapEnd,DriverPassed,DriverPassedStatus,DriverPassedPositionAtLapEnd,GapToLeader,IntervalToPositionAhead
4,0 days 01:04:30.901000,14,2,17,18,18,17,5,,,+11.370,0 days 00:00:00.050000
5,0 days 01:15:42.434000,14,12,15,17,17,14,23,Pit,,+26.337,0 days 00:00:00.410000
6,0 days 01:16:44.358000,14,12,14,15,17,14,6,MaybePit,,+27.844,0 days 00:00:00.354000
7,0 days 01:16:55.408000,14,13,12,14,14,12,10,Pit,,+28.120,0 days 00:00:01.798000
8,0 days 01:19:19.681000,14,15,11,12,12,11,4,Pit,,+30.972,0 days 00:00:01.015000
9,0 days 01:20:31.277000,14,16,10,11,11,10,20,Pit,,+31.908,0 days 00:00:00.664000
10,0 days 01:21:43.294000,14,17,8,10,10,8,47,Pit,,+33.511,0 days 00:00:00.624000
11,0 days 01:30:24.201000,14,24,9,10,9,10,24,,,+45.908,0 days 00:00:00.008000
12,0 days 01:33:43.994000,14,26,10,11,11,10,22,Pit,,+45.071,0 days 00:00:00.668000
13,0 days 01:40:25.461000,14,32,17,18,18,17,6,,,+58.274,0 days 00:00:00.260000


## Filtered overtake data

Unnamed: 0,Time,Driver,LapNumber,Position,PositionBefore,PositionAtLapStart,PositionAtLapEnd,DriverPassed,DriverPassedStatus,DriverPassedPositionAtLapEnd,GapToLeader,IntervalToPositionAhead
4,0 days 01:04:30.901000,14,2,17,18,18,17,5,,,+11.370,0 days 00:00:00.050000
11,0 days 01:30:24.201000,14,24,9,10,9,10,24,,,+45.908,0 days 00:00:00.008000
13,0 days 01:40:25.461000,14,32,17,18,18,17,6,,,+58.274,0 days 00:00:00.260000
14,0 days 01:41:43.533000,14,33,16,17,17,16,22,,,+58.416,0 days 00:00:00.033000
18,0 days 01:50:01.695000,14,40,13,14,14,12,18,,,+66.421,0 days 00:00:00.141000
26,0 days 02:15:54.252000,14,61,13,14,14,13,18,,,1 L,0 days 00:00:00.109000
27,0 days 02:16:50.720000,14,62,12,13,13,12,10,,,1 L,0 days 00:00:00.102000
28,0 days 02:22:36.883000,14,67,11,12,12,11,23,,,1 L,0 days 00:00:00.011000
29,0 days 02:26:12.466000,14,70,10,11,11,10,77,,,1 L,0 days 00:00:00.099000


Lap 2 ALO on VET for P17 at 0 days 01:04:30.901000
Lap 24 ALO on ZHO for P9 at 0 days 01:30:24.201000
Lap 32 ALO on LAT for P17 at 0 days 01:40:25.461000
Lap 33 ALO on TSU for P16 at 0 days 01:41:43.533000
Lap 40 ALO on STR for P13 at 0 days 01:50:01.695000
Lap 61 ALO on STR for P13 at 0 days 02:15:54.252000
Lap 62 ALO on GAS for P12 at 0 days 02:16:50.720000
Lap 67 ALO on ALB for P11 at 0 days 02:22:36.883000
Lap 70 ALO on BOT for P10 at 0 days 02:26:12.466000

