In [43]:
from IPython.display import display, Markdown, HTML
import pandas as pd
import numpy as np

import fastf1 as ff1
from scrape.core import get_session

ff1.Cache.enable_cache("./data/cache")

YEAR = 2022
RACE = "Austria"

session = get_session(YEAR, RACE, "Race")
session.load(telemetry=True, laps=True, messages=True, weather=False)


core           INFO 	Loading data for Austrian Grand Prix - Race [v2.2.9]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
api            INFO 	Using cached data for car_data
api            INFO 	Using cached data for position_data
api            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['16', '1', '44', '63', '31', '47', '4', '20', '3', '14', '77', '23', '18', '24', '10', '22', '5', '55', '6', '11']
api            INFO 	Using cached data for get_timing_data
session        INFO 	Processing timing data...
api            INFO 	Using cached data for track_status_data


In [7]:
from scrape.core.driver import get_drivers
drivers = get_drivers(2022)

def get_test_stuff():
    with open("./notebooks/test.txt") as f:
        lines = f.readlines()

    woo = {}
    for l in lines:
        parts = l.strip().split(" ")
        lap_number = parts[1]
        driver_name = parts[2]
        passed_name = parts[4]
        position = parts[-1]

        driver = drivers[drivers["LastName"] == driver_name].iloc[0]
        passed = drivers[drivers["LastName"] == passed_name].iloc[0]
        driver_number = driver["DriverNumber"]

        if driver_number not in woo:
            woo[driver_number] = []
        woo[driver_number].append((lap_number, passed["Code"], position))
    return woo


display(get_test_stuff())


{'22': [('2', 'ZHO', '14'), ('32', 'LAT', '16')],
 '14': [('2', 'VET', '17'),
  ('24', 'ZHO', '8'),
  ('24', 'ZHO', '9'),
  ('32', 'LAT', '17'),
  ('33', 'TSU', '16'),
  ('40', 'STR', '12'),
  ('61', 'STR', '13'),
  ('62', 'GAS', '12'),
  ('67', 'ALB', '11'),
  ('70', 'BOT', '10')],
 '24': [('2', 'TSU', '14'),
  ('24', 'ALO', '9'),
  ('32', 'LAT', '18'),
  ('37', 'TSU', '17'),
  ('50', 'VET', '17'),
  ('54', 'TSU', '16'),
  ('67', 'GAS', '14')],
 '47': [('4', 'HAM', '7'),
  ('19', 'RIC', '13'),
  ('24', 'ZHO', '11'),
  ('25', 'ALO', '10'),
  ('27', 'NOR', '8'),
  ('32', 'MAG', '6'),
  ('45', 'STR', '8'),
  ('47', 'STR', '8')],
 '4': [('5', 'RIC', '9'),
  ('24', 'ALO', '9'),
  ('24', 'ZHO', '10'),
  ('41', 'MAG', '7'),
  ('46', 'TSU', '12'),
  ('62', 'MAG', '7')],
 '16': [('12', 'VER', '1'), ('33', 'VER', '1'), ('53', 'VER', '1')],
 '77': [('13', 'GAS', '16'), ('64', 'ALB', '10')],
 '44': [('14', 'MSC', '5'), ('15', 'MAG', '4'), ('30', 'OCO', '4')],
 '10': [('15', 'BOT', '16')],
 '63': 

In [4]:
from scrape.core import get_drivers
from scrape.core.driver import add_driver_code
from scrape.race import get_driver_overtakes
from scrape.race.timing import get_timing_data, add_lap_number_to_timing_data, merge_leader_lap
from scrape.race.overtake import (
    __get_position_changes,
    __get_driver_position_changes,
)

DRIVER = "22"

# TODO: DriverCode should just be named Driver (like ff1.core.Lap)

drivers = get_drivers(YEAR)
timing = get_timing_data(session)

driver_laps = session.laps.pick_driver(DRIVER)
position_changes = __get_position_changes(session.laps, timing)

overtakes = get_driver_overtakes(session, DRIVER, timing, position_changes)
overtakes = add_driver_code(overtakes, drivers)
overtakes = add_driver_code(overtakes, drivers, suffix="Behind")
overtakes = add_driver_code(overtakes, drivers, suffix="Ahead")
overtakes["LapNumber"] = overtakes["LapNumber"].astype("int64")
overtakes = overtakes.sort_values(by="LapNumber")
overtakes = overtakes[
    ["Time", "LapNumber", "PassingStatus", "Code", "Position", "CodeBehind", "CodeAhead"]
]
overtakes_filtered = overtakes[overtakes["PassingStatus"].isnull()]
display(overtakes_filtered)
display(overtakes)

drv_position_changes = position_changes.query(f'DriverNumber == "{DRIVER}"').reset_index(drop=True)
drv_position_changes = add_driver_code(drv_position_changes, drivers)
drv_position_changes = add_driver_code(drv_position_changes, drivers, suffix="Behind")
drv_position_changes = add_driver_code(drv_position_changes, drivers, suffix="Ahead")
drv_position_changes = add_driver_code(drv_position_changes, drivers, suffix="Passed")
# drv_position_changes["Time"] = position_changes["Time"].astype('timedelta64[s]')
drv_position_changes = drv_position_changes[
    [
        "Time",
        "LapNumber",
        "Code",
        "CodePassed",
        "Position",
        "PositionBefore",
        "PositionGained",
        "CodeBehind",
        "CodeAhead",
    ]
]

from scrape.data import df_timedelta_to_string

display(
    merge_leader_lap(driver_laps, session.laps, timing)[
        [
            "Time",
            "LapTime",
            "LapNumber",
            "LapLeader",
            "LapStartTime",
            "PitInTime",
            "PitOutTime",
            "Sector1Time",
            "Sector2Time",
            "Sector3Time",
            "Sector1SessionTime",
            "Sector2SessionTime",
            "Sector3SessionTime",
        ]
    ]
    .rename(
        {
            "Sector1SessionTime": "S1SessTime",
            "Sector2SessionTime": "S2SessTime",
            "Sector3SessionTime": "S3SessTime",
        }
    )
    .apply(df_timedelta_to_string, axis=1)
    .astype(str)
    .iloc[45:50]
)


display(drv_position_changes)


api            INFO 	Using cached data for timing_data


Unnamed: 0,Time,LapNumber,PassingStatus,Code,Position,CodeBehind,CodeAhead
1,0 days 01:04:00.967000,2,,TSU,14,ZHO,ALB
6,0 days 01:17:29.953000,13,,TSU,10,ZHO,STR
12,0 days 01:40:19.316000,32,,TSU,16,LAT,STR


Unnamed: 0,Time,LapNumber,PassingStatus,Code,Position,CodeBehind,CodeAhead
0,0 days 01:03:50.038000,2,Duplicate,TSU,14,ZHO,ALB
1,0 days 01:04:00.967000,2,,TSU,14,ZHO,ALB
2,0 days 01:15:37.025000,11,Pit,TSU,13,ALB,GAS
3,0 days 01:15:40.557000,12,Pit,TSU,12,RUS,GAS
4,0 days 01:16:49.545000,12,Pit,TSU,11,GAS,RIC
5,0 days 01:16:53.057000,13,Pit,TSU,10,RIC,STR
6,0 days 01:17:29.953000,13,,TSU,10,ZHO,STR
7,0 days 01:19:18.038000,15,Pit,TSU,9,NOR,STR
8,0 days 01:20:30.188000,16,Pit,TSU,8,MAG,STR
9,0 days 01:21:42.246000,17,Pit,TSU,6,OCO,STR


Unnamed: 0,Time,LapTime,LapNumber,LapLeader,LapStartTime,PitInTime,PitOutTime,Sector1Time,Sector2Time,Sector3Time,Sector1SessionTime,Sector2SessionTime,Sector3SessionTime
1100,1:58:03.579,01:15.742,46,48,1:56:47.837,1:58:00.254,NaT,17.71,32.629,25.403,1:57:05.547,1:57:38.176,1:58:03.579
1101,1:59:32.307,01:28.728,47,49,1:58:03.579,NaT,1:58:21.708,35.012,32.084,21.632,1:58:38.591,1:59:10.675,1:59:32.307
1102,2:00:43.008,01:10.701,48,50,1:59:32.307,NaT,NaT,17.637,31.55,21.514,1:59:49.944,2:00:21.494,2:00:43.008
1103,2:01:53.667,01:10.659,49,51,2:00:43.008,NaT,NaT,17.607,31.473,21.579,2:01:00.615,2:01:32.088,2:01:53.667
1104,2:03:03.940,01:10.273,50,52,2:01:53.667,NaT,NaT,17.506,31.304,21.463,2:02:11.173,2:02:42.477,2:03:03.940


Unnamed: 0,Time,LapNumber,Code,CodePassed,Position,PositionBefore,PositionGained,CodeBehind,CodeAhead
0,0 days 00:00:03.773000,,TSU,,16,,,,
1,0 days 01:02:23.743000,,TSU,ALB,15,16.0,1.0,ALB,ZHO
2,0 days 01:02:29.106000,,TSU,,16,15.0,-1.0,VET,ALB
3,0 days 01:03:01.606000,,TSU,PER,15,16.0,1.0,PER,ALB
4,0 days 01:03:50.038000,2.0,TSU,ZHO,14,15.0,1.0,ZHO,ALB
5,0 days 01:03:55.186000,2.0,TSU,,15,14.0,-1.0,LAT,ZHO
6,0 days 01:04:00.967000,2.0,TSU,ZHO,14,15.0,1.0,ZHO,ALB
7,0 days 01:15:37.025000,11.0,TSU,ALB,13,14.0,1.0,ALB,GAS
8,0 days 01:15:40.557000,12.0,TSU,RUS,12,13.0,1.0,RUS,GAS
9,0 days 01:16:49.545000,12.0,TSU,GAS,11,12.0,1.0,GAS,RIC


In [52]:
from scrape.core.position import driver_position_data
from datetime import timedelta
from scrape.core import timing


pos_df = driver_position_data(session.api_path)
overtakes = []


def is_overtake(pos_df, idx, overtaker, overtaken):
    overtaker_pos_df = pos_df[["Time", overtaker]].dropna()

    overtaker_pos = pos_df.iloc[idx][overtaker]
    time = pos_df.iloc[idx]["Time"]

    # 1. is not the first lap
    lap = timing.get_lap_at_time(time, session.laps.pick_driver(overtaker))
    if lap is None or lap.LapNumber == 1:
        return "FIRST_LAP"

    # 2. overtaken driver is not retired

    # 3. overtaken driver is not pitted
    if timing.is_in_pit(time, session.timings.pick_driver(overtaken)):
        return "IN_PIT"

    # 4. overtaken driver is not off track

    # 5. overtaken driver driving slower than 30km/h are considered maybe off track

    # 6. position is maintained for at least 5 seconds
    positions_next_5_seconds = overtaker_pos_df[
        (overtaker_pos_df["Time"] > time)
        & (overtaker_pos_df["Time"] <= time + timedelta(seconds=5))
    ]
    for ii, row in positions_next_5_seconds.iterrows():
        if row[drv] > overtaker_pos:
            return 'LOST_TOO_QUICK'


for idx, row in pos_df[:5].iterrows():
    if idx == 0:
        continue

    time = row["Time"]
    position_changes = row.drop(labels="Time").dropna()

    for drv, pos in position_changes.items():
        previous = pos_df.fillna(method='ffill').iloc[idx - 1][drv] # not idx - 1 but need to be previous known pos.
        if pos < previous:  # position improved

            # now check against the other drivers who's positions changed.  if
            # this driver's position is better run through our "overtake
            # criteria" to see if it was a valid overtake.
            for other_driver, other_driver_pos in position_changes.drop(labels=drv).items():
                print(f'{drv}@{pos} vs {other_driver}@{other_driver_pos}')
                if other_driver_pos > pos:  # this driver has better position, no need to check
                    reason = is_overtake(pos_df, idx, drv, other_driver)
                    overtakes.append(
                        {"Time": time, "Overtaker": drv, "Overtaken": other_driver, "Reason": reason}
                    )
display(pos_df)
display(pd.DataFrame(overtakes))


api            INFO 	Using cached data for driver_position_data


1
20@6.0 vs 31@7.0
2
3@10.0 vs 4@11.0
3
10@13.0 vs 24@14.0
4
31@5.0 vs 11@6.0
31@5.0 vs 20@7.0


Unnamed: 0,Time,1,16,55,63,11,31,20,44,47,...,3,18,24,10,23,22,6,5,14,77
0,0 days 00:00:03.773000,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,11.0,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0
1,0 days 01:02:36.323000,,,,,,7.0,6.0,,,...,,,,,,,,,,
2,0 days 01:02:36.701000,,,,,,,,,,...,10.0,,,,,,,,,
3,0 days 01:02:37.381000,,,,,,,,,,...,,,14.0,13.0,,,,,,
4,0 days 01:03:30.941000,,,,,6.0,5.0,7.0,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
249,0 days 02:23:38.701000,,,,,,,,,,...,,,14.0,15.0,,,,,,
250,0 days 02:23:44.920000,,,,,,,,,,...,,,,,,17.0,,16.0,,
251,0 days 02:26:56.858000,,,,,,,,,,...,,,,,,,,,10.0,11.0
252,0 days 02:27:15.292000,,,,,,,,,,...,,,,16.0,,,,15.0,,


Unnamed: 0,Time,Overtaker,Overtaken,Reason
0,0 days 01:02:36.323000,20,31,FIRST_LAP
1,0 days 01:02:36.701000,3,4,FIRST_LAP
2,0 days 01:02:37.381000,10,24,FIRST_LAP
3,0 days 01:03:30.941000,31,11,
4,0 days 01:03:30.941000,31,20,


In [167]:
from scrape.race import get_driver_overtakes
display(get_driver_overtakes(session, "20").query("PassingStatus == 'OK'"))

Unnamed: 0,Time,LapNumber,Position,DriverNumber,DriverNumberAgainst,PassingStatus
1,0 days 01:04:00.967000,2,14,22,24,OK
6,0 days 01:17:29.953000,13,10,22,24,OK
13,0 days 01:40:19.316000,32,16,22,6,OK


In [133]:
# 01:16:44.35

session.timings.pick_driver(24)[550:600]

Unnamed: 0,Time,Position,GapToLeader,IntervalToPositionAhead,Retired,DriverNumber,LapNumber,Status,LastSectorSegmentNumber,LastSectorSegmentStatus,RawStatus
32280,0 days 01:16:09.987000,13,24.735,0.181,,24,12,,,,
32281,0 days 01:16:10.026000,13,,,,24,12,,2.4,2049.0,
32282,0 days 01:16:17.537000,13,25.023,0.323,,24,12,,,,
32283,0 days 01:16:17.680000,13,,,,24,12,,2.5,2048.0,
32284,0 days 01:16:21.791000,13,25.046,0.309,,24,12,,,,
32285,0 days 01:16:21.881000,13,,,,24,12,,2.6,2048.0,
32286,0 days 01:16:27.296000,13,25.424,0.34,,24,12,,,,
32287,0 days 01:16:27.891000,13,,,,24,12,,2.7,2048.0,
32288,0 days 01:16:31.644000,13,25.79,0.511,,24,12,,,,
32289,0 days 01:16:31.712000,13,,,,24,12,,3.1,2048.0,


In [41]:
display(session.session_status)

Unnamed: 0,Time,Status
0,0 days 00:00:08.046000,Inactive
1,0 days 01:02:13.798000,Started
2,0 days 02:26:38.367000,Finished
3,0 days 02:31:27.396000,Finalised
4,0 days 02:31:27.396000,Ends
