In [7]:
from IPython.display import display, Markdown, HTML
import pandas as pd
import numpy as np
import fastf1 as ff1

pd.options.display.max_columns = 100
pd.options.display.max_rows = 100

In [8]:
YEAR = 2022
RACE = 'Austria'

ff1.Cache.enable_cache('../data/cache') 

session = ff1.get_session(YEAR, RACE, 'Race')
session.load(telemetry=True, laps=True, messages=True, weather=False)

core           INFO 	Loading data for Austrian Grand Prix - Race [v2.2.9]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
api            INFO 	Using cached data for car_data
api            INFO 	Using cached data for position_data
api            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '1', '44', '63', '31', '47', '4', '20', '3', '14', '77', '23', '18', '24', '10', '22', '5', '55', '6', '11']


In [9]:
def flatten(mydict, sep = '.'):
    new_dict = {}
    for key,value in mydict.items():
        if isinstance(value, dict):
            _dict = {sep.join([key, _key]):_value for _key, _value in flatten(value).items()}
            new_dict.update(_dict)
        elif isinstance(value, list):
            _dict = {}
            for i, v in enumerate(value):
                if isinstance(v, dict):
                    _dict.update({sep.join([key, f'[{i}]', _key]):_value for _key, _value in flatten(v).items()})
                else:
                    _dict.update({sep.join([key, f'[{i}]']): v})
            new_dict.update(_dict)
        else:
            new_dict[key]=value
    return new_dict

test = {
    'a': {
        'b': 'hi',
        'c': ['hello', {'d': 'howdy'}]
    }
}
print(flatten(test))

{'a.b': 'hi', 'a.c.[0]': 'hello', 'a.c.[1].d': 'howdy'}


In [10]:
# Fetch data
obj = ff1.api.fetch_page(session.api_path, 'timing_data')

In [69]:
column_to_key = {
    "Line": "Line",
    "NumberOfLaps": "NumberOfLaps",
    "NumberOfPitStops": "NumberOfPitStops",
    "Stopped": "Stopped",
    "GapToLeader": "GapToLeader",
    "IntervalToPositionAhead": "IntervalToPositionAhead.Value",
    "Catching": "IntervalToPositionAhead.Catching",
    "Position": "Position",
    "ShowPosition": "ShowPosition",
    "Retired": "Retired",
    "InPit": "InPit",
    "PitOut": "PitOut",
    "Status": "Status",
    "Sector1_Time": "Sectors.[0].Value",
    "Sector1_Time_Previous": "Sectors.[0].PreviousValue",
    "Sector1_Stopped": "Sectors.[0].Stopped",
    "Sector1_Status": "Sectors.[0].Status",
    "Sector1_OverallFastest": "Sectors.[0].OverallFastest",
    "Sector1_PersonalFastest": "Sectors.[0].PersonalFastest",
    "Sector2_Time": "Sectors.[1].Value",
    "Sector2_Time_Previous": "Sectors.[1].PreviousValue",
    "Sector2_Stopped": "Sectors.[1].Stopped",
    "Sector2_Status": "Sectors.[1].Status",
    "Sector2_OverallFastest": "Sectors.[1].OverallFastest",
    "Sector2_PersonalFastest": "Sectors.[1].PersonalFastest",
    "Sector3_Time": "Sectors.[2].Value",
    "Sector3_Time_Previous": "Sectors.[2].PreviousValue",
    "Sector3_Stopped": "Sectors.[2].Stopped",
    "Sector3_Status": "Sectors.[2].Status",
    "Sector3_OverallFastest": "Sectors.[2].OverallFastest",
    "Sector3_PersonalFastest": "Sectors.[2].PersonalFastest",
    "SpeedI1": "Speeds.I1.Value",
    "SpeedI1_Status": "Speeds.I1.Status",
    "SpeedI1_OverallFastest": "Speeds.I1.OverallFastest",
    "SpeedI1_PersonalBest": "Speeds.I1.PersonalFastest",
    "SpeedI2": "Speeds.I2.Value",
    "SpeedI2_Status": "Speeds.I2.Status",
    "SpeedI2_OverallFastest": "Speeds.I2.OverallFastest",
    "SpeedI2_PersonalBest": "Speeds.I2.PersonalFastest",
    "SpeedFL": "Speeds.FL.Value",
    "SpeedFL_Status": "Speeds.FL.Status",
    "SpeedFL_OverallFastest": "Speeds.FL.OverallFastest",
    "SpeedFL_PersonalBest": "Speeds.FL.PersonalFastest",
    "SpeedST": "Speeds.ST.Value",
    "SpeedST_Status": "Speeds.ST.Status",
    "SpeedST_OverallFastest": "Speeds.ST.OverallFastest",
    "SpeedST_PersonalBest": "Speeds.ST.PersonalFastest",
    "BestLap": "BestLapTime.Lap",
    "BestLapTime": "BestLapTime.Value",
    "LastLapTime": "LastLapTime.Value",
    "LastLapTime_Status": "LastLapTime.Status",
    "LastLapTime_OverallFastest": "LastLapTime.OverallFastest",
    "LastLapTime_PersonalFastest": "LastLapTime.PersonalFastest",
}
for sector in range(0,3):
    for segment in range(0,7):
        column_to_key[f'Sector{sector+1}_Segment{segment+1}_Status'] = f'Sectors.{sector}.Segments.[{segment}].Status'
columns = ["Time", "Driver"] + list(column_to_key.keys())

def to_float(x):
    if not x:
        return np.nan
    return np.float64(x)

def to_int(x):
    if not x:
        return np.nan
    return np.int64(x)

def to_bool(x):
    if x:
        return pd.Series(True, dtype=bool)
    return pd.Series(False, dtype=bool)

def to_str(x):
    return str(x)

def to_timedelta(x):
    if not x:
        return pd.NaT
    return ff1.utils.to_timedelta(x)


column_data_transforms = {
    "Line": to_int,
    "NumberOfLaps": to_int,
    "NumberOfPitStops": to_int,
    "Status": to_int,
    "Sector1_Time": to_timedelta,
    "Sector1_Time_Previous": to_timedelta,
    "Sector1_OverallFastest": to_timedelta,
    "Sector1_PersonalFastest": to_timedelta,
    "Sector2_Time": to_timedelta,
    "Sector2_Time_Previous": to_timedelta,
    "Sector2_OverallFastest": to_timedelta,
    "Sector2_PersonalFastest": to_timedelta,
    "Sector3_Time": to_timedelta,
    "Sector3_Time_Previous": to_timedelta,
    "Sector3_OverallFastest": to_timedelta,
    "Sector3_PersonalFastest": to_timedelta,
    "SpeedI1": to_timedelta,
    "SpeedI1_OverallFastest": to_timedelta,
    "SpeedI1_PersonalBest": to_timedelta,
    "SpeedI2": to_timedelta,
    "SpeedI2_OverallFastest": to_timedelta,
    "SpeedI2_PersonalBest": to_timedelta,
    "SpeedI3": to_timedelta,
    "SpeedI3_OverallFastest": to_timedelta,
    "SpeedI3_PersonalBest": to_timedelta,
    "BestLapTime": to_timedelta,
    "LastLapTime": to_timedelta,
    "LastLapTime_OverallFastest": to_timedelta,
    "LastLapTime_PersonalFastest":to_timedelta,
    "SpeedI1": to_float,
    "SpeedI1_OverallFastest": to_float,
    "SpeedI1_PersonalBest": to_float, # this is just 1 if this is true
    "SpeedI2": to_float,
    "SpeedI2_OverallFastest": to_float,
    "SpeedI2_PersonalBest": to_float,
    "SpeedST": to_float,
    "SpeedST_OverallFastest": to_float,
    "SpeedST_PersonalBest": to_float,
    "SpeedFL": to_float,
    "SpeedFL_OverallFastest": to_float,
    "SpeedFL_PersonalBest": to_float,
    "InPit": to_bool,
    "PitOut": to_bool,
    "Catching": to_bool,
}
for sector in range(0, 3):
    column_data_transforms[f'Sector{sector+1}_Time'] = to_timedelta
    column_data_transforms[f'Sector{sector+1}_Time_Previous'] = to_timedelta
    column_data_transforms[f'Sector{sector+1}_Stopped'] = to_bool
    column_data_transforms[f'Sector{sector+1}_Status'] = to_int
    column_data_transforms[f'Sector{sector+1}_OverallFastest'] = to_bool
    column_data_transforms[f'Sector{sector+1}_PersonalFastest'] = to_bool
    for segment in range(0, 7):
        column_data_transforms[f'Sector{sector+1}_Segment{segment+1}_Status'] = to_int

key_to_column = {v: k for k, v in column_to_key.items()}
additional_keys = {
    "Sectors.0.Value": "Sector1_Time",
    "Sectors.0.PreviousValue": "Sector1_Time_Previous",
    "Sectors.0.Stopped": "Sector1_Stopped",
    "Sectors.0.Status": "Sector1_Status",
    "Sectors.0.OverallFastest": "Sector1_OverallFastest",
    "Sectors.0.PersonalFastest": "Sector1_PersonalFastest",
    "Sectors.1.Value": "Sector2_Time",
    "Sectors.1.PreviousValue": "Sector2_Time_Previous",
    "Sectors.1.Stopped": "Sector2_Stopped",
    "Sectors.1.Status": "Sector2_Status",
    "Sectors.1.OverallFastest": "Sector2_OverallFastest",
    "Sectors.1.PersonalFastest": "Sector2_PersonalFastest",
    "Sectors.2.Value": "Sector3_Time",
    "Sectors.2.PreviousValue": "Sector3_Time_Previous",
    "Sectors.2.Stopped": "Sector3_Stopped",
    "Sectors.2.Status": "Sector3_Status",
    "Sectors.2.OverallFastest": "Sector3_OverallFastest",
    "Sectors.2.PersonalFastest": "Sector3_PersonalFastest"
}

for sector in range(0,3):
    for segment in range(0,7):
        additional_keys[f'Sectors.{sector}.Segments.{segment}.Status'] = f'Sector{sector+1}_Segment{segment+1}_Status'
key_to_column.update(additional_keys)

from datetime import timedelta

timing_rows = []
bad_rows = []
idx = 0
default_row = {k: fn(None) for k, fn in column_data_transforms.items()}

for entry in obj:
    if (len(entry) < 2) or 'Lines' not in entry[1]:
        continue
    time = entry[0]
    lines = entry[1]['Lines']
    for drv in entry[1]['Lines']:
        data = flatten(entry[1]['Lines'][drv])
        
        row = default_row.copy()
        row['Time'] = to_timedelta(time)
        row['Driver'] = to_str(drv)
        
        if 'RacingNumber' in data:
            del data['RacingNumber']
            
        try:
            for k, v in data.items():
                column_key = key_to_column[k]
                if column_key in column_data_transforms:
                    v = column_data_transforms[column_key](v)
                else:
                    # everything is a still unless explicitly transformed
                    # TODO: this is not enough - need to set dtypes on all columns without transforms to str
                    v = to_str(drv)
                row[column_key] = v
            timing_rows.append(pd.Series(data=row, dtype=object))
        except KeyError:
            bad_rows.append((drv, data))
    idx += 1

missing_fields = set()
for drv, bad_row in bad_rows: 
    for k, v in bad_row.items():
        if k not in key_to_column:
            missing_fields.add(k)
if missing_fields:
    print(missing_fields)
        
df = pd.DataFrame(data=timing_rows)
display(df)

Unnamed: 0,Line,NumberOfLaps,NumberOfPitStops,Status,Sector1_Time,Sector1_Time_Previous,Sector1_OverallFastest,Sector1_PersonalFastest,Sector2_Time,Sector2_Time_Previous,Sector2_OverallFastest,Sector2_PersonalFastest,Sector3_Time,Sector3_Time_Previous,Sector3_OverallFastest,Sector3_PersonalFastest,SpeedI1,SpeedI1_OverallFastest,SpeedI1_PersonalBest,SpeedI2,SpeedI2_OverallFastest,SpeedI2_PersonalBest,SpeedI3,SpeedI3_OverallFastest,SpeedI3_PersonalBest,BestLapTime,LastLapTime,LastLapTime_OverallFastest,LastLapTime_PersonalFastest,SpeedST,SpeedST_OverallFastest,SpeedST_PersonalBest,SpeedFL,SpeedFL_OverallFastest,SpeedFL_PersonalBest,InPit,PitOut,Catching,Sector1_Stopped,Sector1_Status,Sector1_Segment1_Status,Sector1_Segment2_Status,Sector1_Segment3_Status,Sector1_Segment4_Status,Sector1_Segment5_Status,Sector1_Segment6_Status,Sector1_Segment7_Status,Sector2_Stopped,Sector2_Status,Sector2_Segment1_Status,Sector2_Segment2_Status,Sector2_Segment3_Status,Sector2_Segment4_Status,Sector2_Segment5_Status,Sector2_Segment6_Status,Sector2_Segment7_Status,Sector3_Stopped,Sector3_Status,Sector3_Segment1_Status,Sector3_Segment2_Status,Sector3_Segment3_Status,Sector3_Segment4_Status,Sector3_Segment5_Status,Sector3_Segment6_Status,Sector3_Segment7_Status,Time,Driver,GapToLeader,IntervalToPositionAhead,Position,ShowPosition,Retired,Stopped,SpeedI1_Status,SpeedI2_Status,SpeedFL_Status,SpeedST_Status,LastLapTime_Status,BestLap
0,1.0,,,80.0,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,,,,,,0 True dtype: bool,0 False dtype: bool,0 False dtype: bool,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 days 00:00:03.773000,1,1,1,1,1,1,1,1,1,1,1,1,
1,2.0,,,80.0,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,,,,,,0 True dtype: bool,0 False dtype: bool,0 False dtype: bool,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 days 00:00:03.773000,16,16,16,16,16,16,16,16,16,16,16,16,
2,3.0,,,80.0,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,,,,,,0 True dtype: bool,0 False dtype: bool,0 False dtype: bool,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 days 00:00:03.773000,55,55,55,55,55,55,55,55,55,55,55,55,
3,4.0,,,80.0,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,,,,,,0 True dtype: bool,0 False dtype: bool,0 False dtype: bool,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 days 00:00:03.773000,63,63,63,63,63,63,63,63,63,63,63,63,
4,5.0,,,80.0,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,,,,,,0 True dtype: bool,0 False dtype: bool,0 False dtype: bool,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 days 00:00:03.773000,11,11,11,11,11,11,11,11,11,11,11,11,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63594,,,,,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,,,,,,0 False dtype: bool,0 False dtype: bool,0 False dtype: bool,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 days 02:31:27.396000,22,22,,,,,,,,,,,
63595,,,,,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,,,,,,0 False dtype: bool,0 False dtype: bool,0 False dtype: bool,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 days 02:31:27.396000,6,6,6,,,,,,,,,,
63596,,,,,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,,,,,,0 False dtype: bool,0 False dtype: bool,0 True dtype: bool,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 days 02:31:27.396000,5,5,,,,,,,,,,,
63597,,,,,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,NaT,NaT,0 False dtype: bool,0 False dtype: bool,,,,,,,NaT,NaT,NaT,NaT,NaT,NaT,NaT,,,,,,,0 False dtype: bool,0 False dtype: bool,0 True dtype: bool,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 False dtype: bool,,,,,,,,,0 days 02:31:27.396000,14,14,,,,,,,,,,,


In [48]:
# Status
# 80 => InPit
# 64 => ?
# 96 => PitOut
# 608 => ? but something about pit
# 576
# 592
# 92 => Stopped
# 28 => Retired in Pit ?
# 4 => Retired in Crash ?
# 1088 => Sessison Finished ?
# display(df["Status"].unique())
# display(df[df["Status"] == 4])

# ShowPosition
# False - all the cars DNF'd
#   Latifi -> Status 28 @ 02:07:25.348
#   Perez -> Status 28 @ 01:35:29.890
#   Sainz -> Status 4 @ 02:18:36.262
# True - Only set at race start
# display(df["ShowPosition"].unique())
# display(df[df["ShowPosition"] == True])

lat = df.copy()
lat = df.query('Driver == "6"').reset_index(drop=True)

# display(entries.filter(items=columns_to_show).filter(regex='^(?!.*Stop|Overall|PersonalFastest).*$'))
# display(entries.filter(items=columns_to_show))

# Understanding sector status to see if we can get a better idea of when a driver is entering/exiting the pit lane.
# SectorX_SegmentY_Status
# 2048
# 2049
# 2064 => something about pit lane??
# so for overtake calculation, either want to use:
# For pit_in:
#   - First 2064
#   - Or the timestamp of last 2048 (would be sector 3 segment 4 time for AUS at least)
#   - Using the last 2048 seems to be the best - PIT_LANE_ENTERED
# For pit_out:
#   - First 2048
#   - One timestamp before 2048 (there seems to be single NaN entry for sector segments between 2048 and 2064, probably because they are not in "any sector")
#   - Last 2064
#   - Not 100% on this, first 2048 should be considered PIT_LANE_EXITED

# For reference: 01:16:44.358000 alonso passed latifi, but latifi was going into pit somewhere around that time.
# Last row is pit in
start = lat[lat['NumberOfLaps'] == 11]['Time']
end = lat[lat['NumberOfLaps'] == 12]['Time']
entries = lat[(lat['Time'] >=  start.item()) & (lat['Time'] < end.item())]
display(entries.filter(regex='^(NumberOfLaps|Sector3_Segment\d{1}_Status|InPit|PitOut|Time)$')[-15:-1])

# First row is pit out
start = lat[lat['NumberOfLaps'] == 12]['Time']
end= lat[lat['NumberOfLaps'] == 13]['Time']
entries = lat[(lat['Time'] >=  start.item()) & (lat['Time'] < end.item())]
display(entries.filter(regex='^(NumberOfLaps|Sector1_Segment\d{1}_Status|InPit|PitOut|Time)$')[4:15])


Unnamed: 0,NumberOfLaps,InPit,PitOut,Sector3_Segment1_Status,Sector3_Segment2_Status,Sector3_Segment3_Status,Sector3_Segment4_Status,Sector3_Segment5_Status,Sector3_Segment6_Status,Sector3_Segment7_Status,Time
668,,0 False dtype: bool,0 False dtype: bool,,,,,,,,0 days 01:16:31.255000
669,,0 False dtype: bool,0 False dtype: bool,,,,,,,,0 days 01:16:32.512000
670,,0 False dtype: bool,0 False dtype: bool,2048.0,,,,,,,0 days 01:16:32.538000
671,,0 False dtype: bool,0 False dtype: bool,,,,,,,,0 days 01:16:37.777000
672,,0 False dtype: bool,0 False dtype: bool,,2048.0,,,,,,0 days 01:16:37.800000
673,,0 False dtype: bool,0 False dtype: bool,,,,,,,,0 days 01:16:40.296000
674,,0 False dtype: bool,0 False dtype: bool,,,2048.0,,,,,0 days 01:16:40.386000
675,,0 False dtype: bool,0 False dtype: bool,,,,,,,,0 days 01:16:44.330000
676,,0 False dtype: bool,0 False dtype: bool,,,,,,,,0 days 01:16:44.358000
677,,0 False dtype: bool,0 False dtype: bool,,,,2048.0,,,,0 days 01:16:44.412000


Unnamed: 0,NumberOfLaps,InPit,PitOut,Sector1_Segment1_Status,Sector1_Segment2_Status,Sector1_Segment3_Status,Sector1_Segment4_Status,Sector1_Segment5_Status,Sector1_Segment6_Status,Sector1_Segment7_Status,Time
687,,0 False dtype: bool,0 True dtype: bool,,,,,,,,0 days 01:17:12.499000
688,,0 False dtype: bool,0 False dtype: bool,,,,,,,,0 days 01:17:12.521000
689,,0 False dtype: bool,0 False dtype: bool,,,,,,,,0 days 01:17:20.255000
690,,0 False dtype: bool,0 False dtype: bool,2064.0,,,,,,,0 days 01:17:20.385000
691,,0 False dtype: bool,0 False dtype: bool,,2064.0,,,,,,0 days 01:17:20.388000
692,,0 False dtype: bool,0 False dtype: bool,,,2064.0,,,,,0 days 01:17:20.389000
693,,0 False dtype: bool,0 False dtype: bool,,,,,,,,0 days 01:17:23.448000
694,,0 False dtype: bool,0 False dtype: bool,,,,2048.0,,,,0 days 01:17:23.484000
695,,0 False dtype: bool,0 False dtype: bool,,,,,,,,0 days 01:17:26.080000
696,,0 False dtype: bool,0 False dtype: bool,,,,,2048.0,,,0 days 01:17:26.165000


In [None]:
# ss = df.filter(regex='^(Sector\d_Status|Sector\d_Segment\d{1}_Status)$')
# for t in ss.columns.values.tolist():
#     print(t, ss[t].unique())

# Test assumption on junk data

drivers = df["Driver"].unique()

# driver: errors
check = {k: [] for k in drivers}

for drv in drivers:
    data = df.query(f'Driver == "{drv}"')
    
    # Sector Status - only seems to be populated for the first entry and the value and is always 0
    for i in range(1,4):
        values = data[f'Sector{i}_Status'].unique()
        if len(values) != 2 and not values.all(['0', np.NaN]):
            check[drv] = f'unexpected sector {i} status: {values}'
        d = data[data[f'Sector{i}_Status'] == '0']
        if len(d) != 1:
            check[drv] = f'expected only 1 status for sector {i}'

    # LapTimeStatus is the same deal
    values = data["LastLapTime_Status"].unique()
    if len(values) != 2 and not values.all(['0', np.NaN]):
        check[drv] = f'unexpected lap time status: {values}'
        

for d, err in check.items():
    if err:
        print(f'check failed for driver {d}')
        print(err)
print('done')

In [177]:
from enum import Enum, auto


class DriverStatus(Enum):
    OK = auto()
    RETIRED = auto()
    CRASHED = auto()
    STOPPED = auto()
    PIT_LANE_ENTERED = auto()
    PIT_IN = auto()
    PIT = auto()
    PIT_OUT = auto()
    PIT_LANE_EXITED = auto()


class TimingMarker(Enum):
    PURPLE = auto()
    GREEN = auto()
    YELLO = auto()


# Sanitized Dataframes
extra_timing_data_columns = [
    'Time',
    'Driver',
    'Catching',
    'DriverStatus',
    'RawStatus',
    'LapNumber',

    'Sector',
    'SectorStartTime',
    'SectorSegment',
    'SectorSegmentStartTime',
    'RawSectorSegmentStatus',
    'LastSectorTime',
    'LastSectorMarker',
]

extra_lap_data_columns = [
    'Time',
    'Driver',
    'LapStartTime',
    'LastLapTime',
    'LastLapTimeMarker',
    'BestLapTime',
    'BestLap',
]

drivers = df["Driver"].unique()

# driver -> data
timing_data = {}
sector_data = []

# only get data that are unavailable from ff1
for drv in drivers:
    current = df.query(f'Driver == "{drv}"').reset_index(drop=True)

    timing_data[drv] = []
    last_not_ok_status = None
    last_not_ok_status_idx = None
    lap_number = np.nan

    for i, row in current.iterrows():
        warning = np.nan

        status = DriverStatus.OK
        if row['InPit'].bool():
            status = DriverStatus.PIT_IN
        elif row['PitOut'].bool():
            status = DriverStatus.PIT_OUT
        
        if status != DriverStatus.OK:
            last_not_ok_status_idx = i
            last_not_ok_status = status

        if not pd.isnull(row['NumberOfLaps']) and lap_number != row['NumberOfLaps']:
            lap_number = row['NumberOfLaps']
        
        raw_status = np.nan
        if not pd.isnull(row['Status']):
            raw_status = row['Status']

        # Sector data
        sector = np.nan
        sector_segment = np.nan
        sector_segment_status = np.nan
        ssg = row.filter(regex='^Sector\d{1}_Segment\d{1}_Status').dropna()
        if len(ssg.keys()) > 1:
            warning = 'MULTIPLE_SEGMENTS'
        for k, v in ssg.items():
            sector_label, segment_label, _ = k.split('_')
            sector = int(sector_label.lstrip('Sector'))
            sector_segment = int(segment_label.lstrip('Segment'))
            sector_segment_status = v
        
        # Find when the driver entered pit lane or exiting
        if last_not_ok_status == DriverStatus.PIT_IN:
            for ii in range(last_not_ok_status_idx-1, -1, -1):
                if timing_data[drv][ii]['RawSectorSegmentStatus'] == 2048:
                    timing_data[drv][ii]['DriverStatus'] = DriverStatus.PIT_LANE_ENTERED.name
                    break
            last_not_ok_status = None
        elif sector_segment_status == 2048 and last_not_ok_status == DriverStatus.PIT_OUT:
            status = DriverStatus.PIT_LANE_EXITED
            last_not_ok_status = None

        timing = {
            'Time': row['Time'],
            'Driver': int(drv),
            'LapNumber': lap_number,
            'Catching': row['Catching'],
            'DriverStatus': status.name,
            'RawStatus': raw_status,
            'Sector': sector,
            'SectorSegment': sector_segment,
            'RawSectorSegmentStatus': sector_segment_status,
            'Warning': warning,
        }
        timing_data[drv].append(timing)

timing_df = pd.DataFrame([v for vlist in timing_data.values() for v in vlist])
display(timing_df[670:700])
# display(timing_df[timing_df['RawSectorSegmentStatus'] == 2064])

# drivers = df["DriverN


Unnamed: 0,Time,Driver,LapNumber,Catching,DriverStatus,RawStatus,Sector,SectorSegment,RawSectorSegmentStatus,Warning
670,0 days 01:22:40.628000,1,17.0,0 False dtype: bool,OK,,,,,
671,0 days 01:22:40.718000,1,17.0,0 False dtype: bool,OK,,1.0,1.0,2049.0,
672,0 days 01:22:42.356000,1,17.0,0 False dtype: bool,OK,,,,,
673,0 days 01:22:42.358000,1,17.0,0 False dtype: bool,OK,,1.0,2.0,2048.0,
674,0 days 01:22:46.520000,1,17.0,0 False dtype: bool,OK,,,,,
675,0 days 01:22:46.656000,1,17.0,0 False dtype: bool,OK,,1.0,3.0,2048.0,
676,0 days 01:22:49.531000,1,17.0,0 False dtype: bool,OK,,,,,
677,0 days 01:22:49.591000,1,17.0,0 False dtype: bool,OK,,1.0,4.0,2049.0,
678,0 days 01:22:52.001000,1,17.0,0 False dtype: bool,OK,,,,,
679,0 days 01:22:52.150000,1,17.0,0 False dtype: bool,OK,,1.0,5.0,2048.0,


In [174]:
# verstappen = timing_df.query('Driver == "1" and LapNumber == 12')
verstappen = timing_df[464:485]

target = pd.Timedelta(seconds=22, microseconds=22000)
print('Want:', target)
PIT_LANE_ENTERED = 464
PIT_IN = 467
PIT_OUT = 476
PIT_LANE_EXITED = 484

for i in range(PIT_LANE_ENTERED, PIT_OUT):
    for j in range(PIT_IN+1, PIT_LANE_EXITED+1):
        diff = timing_df.iloc[j]['Time'] - timing_df.iloc[i]['Time']
        if diff.seconds == target.seconds:
            print('Close:', i, j, diff)
        if diff == target:
            print('yeey')

print('Closest [PIT_OUT - PIT_IN - 1]:', timing_df.iloc[PIT_OUT]['Time'] - timing_df.iloc[PIT_IN - 1]['Time'])
display(verstappen)

Want: 0 days 00:00:22.022000
Close: 465 476 0 days 00:00:22.091000
Close: 465 477 0 days 00:00:22.121000
Close: 466 476 0 days 00:00:22.024000
Close: 466 477 0 days 00:00:22.054000
Close: 467 476 0 days 00:00:22.008000
Close: 467 477 0 days 00:00:22.038000
Close: 468 476 0 days 00:00:22.007000
Close: 468 477 0 days 00:00:22.037000
Closest [PIT_OUT - PIT_IN - 1]: 0 days 00:00:22.024000


Unnamed: 0,Time,Driver,LapNumber,Catching,DriverStatus,RawStatus,Sector,SectorSegment,RawSectorSegmentStatus,Warning
464,0 days 01:17:29.025000,1,12.0,0 False dtype: bool,PIT_LANE_ENTERED,,3.0,4.0,2048.0,
465,0 days 01:17:35.240000,1,12.0,0 False dtype: bool,OK,,3.0,7.0,2064.0,
466,0 days 01:17:35.307000,1,12.0,0 False dtype: bool,OK,,3.0,5.0,2064.0,
467,0 days 01:17:35.323000,1,12.0,0 False dtype: bool,PIT_IN,80.0,,,,
468,0 days 01:17:35.324000,1,12.0,0 False dtype: bool,OK,,3.0,6.0,2064.0,
469,0 days 01:17:36.946000,1,12.0,0 False dtype: bool,OK,,,,,
470,0 days 01:17:38.733000,1,13.0,0 False dtype: bool,OK,,,,,
471,0 days 01:17:41.378000,1,13.0,0 False dtype: bool,OK,,,,,
472,0 days 01:17:52.681000,1,13.0,0 False dtype: bool,OK,,,,,
473,0 days 01:17:54.477000,1,13.0,0 False dtype: bool,OK,,,,,
