In [1]:
import pickle
import pandas as pd
import numpy as np
from train import (
    run_preprocessing_pipeline,
    make_sector_to_lap_sequences,
    split_features,
    predict,
)
from sklearn.metrics import confusion_matrix, classification_report

In [19]:
data_path = "data/f1_sector_data.pkl"

base_numeric_cols = [
    'LapNumber', 'SectorTime', 'TyreLife',
    'AirTemp', 'Humidity', 'Pressure', 'TrackTemp',
    'WindDirection', 'WindSpeed'
]
telemetry_numeric_cols = [
    'LapNumber', 'SectorTime', 'TyreLife', 'Speed_P10',
    'Throttle_Median', 'Throttle_ZeroPct', 
    'Gear_Range', 'DRS_ActivePct', 'TrackStatus_Mean',
    'AirTemp', 'Humidity', 'Pressure', 'TrackTemp',
    'WindDirection', 'WindSpeed'
]
base_categorical_cols = ['EventName', 'Team', 'Compound', 'Stint']
telemetry_categorical_cols = base_categorical_cols + ['TrackStatus_Mode']

In [20]:
model_paths = {
    "base_rnn":  "models/rnn/model_base.pkl",
    "tel_rnn":   "models/rnn/model_telemetry.pkl",
    "base_gru":  "models/gru/model_base.pkl",
    "tel_gru":   "models/gru/model_telemetry.pkl",
    "base_lstm": "models/lstm/model_base.pkl",
    "tel_lstm":  "models/lstm/model_telemetry.pkl",
}

models = {}
for name, path in model_paths.items():
    with open(path, "rb") as f:
        models[name] = pickle.load(f)


## Base

In [21]:
features_to_keep = base_numeric_cols + base_categorical_cols + ['Year', 'Driver', 'SectorNumber', 'LapsTilPit']

In [22]:
train_df, val_df, test_df, label_col, scaler_dict, n_classes, encoders = run_preprocessing_pipeline(
    filepath=data_path,
    features_to_keep=features_to_keep,
    numeric_cols=base_numeric_cols,
    categorical_cols=base_categorical_cols,
    laps_label_window=5
)

In [23]:
feature_cols = base_categorical_cols + base_numeric_cols
window_size = 15
X_test, y_test = make_sector_to_lap_sequences(test_df, feature_cols, label_col, window_size)

In [24]:
cat_idx = [feature_cols.index(c) for c in base_categorical_cols]
num_idx = [feature_cols.index(n) for n in base_numeric_cols]
X_test_cat, X_test_num = split_features(X_test, cat_idx, num_idx)

In [25]:
# Only include 'base' models
base_model_keys = [k for k in models.keys() if 'base' in k]

base_predictions = {}
for name in base_model_keys:
    y_prob, y_pred = predict(models[name], X_test_num, X_test_cat)
    base_predictions[name] = {
        "y_prob": y_prob,
        "y_pred": y_pred
    }

In [27]:
for name, preds in base_predictions.items():
    y_pred = preds["y_pred"]
    print(f"\n=== Results for {name} ===\n")
    cm = confusion_matrix(y_test, y_pred)
    cr = classification_report(y_test, y_pred, digits=2)
    print("Confusion Matrix:")
    print(cm)
    print("\nClassification Report:")
    print(cr)


=== Results for base_rnn ===

Confusion Matrix:
[[4245 2224]
 [ 932 1682]]

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.66      0.73      6469
           1       0.43      0.64      0.52      2614

    accuracy                           0.65      9083
   macro avg       0.63      0.65      0.62      9083
weighted avg       0.71      0.65      0.67      9083


=== Results for base_gru ===

Confusion Matrix:
[[4223 2246]
 [ 932 1682]]

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.65      0.73      6469
           1       0.43      0.64      0.51      2614

    accuracy                           0.65      9083
   macro avg       0.62      0.65      0.62      9083
weighted avg       0.71      0.65      0.67      9083


=== Results for base_lstm ===

Confusion Matrix:
[[4191 2278]
 [ 839 1775]]

Classification Report:
              precision    recall  f1-score  

## Telemetry

In [28]:
features_to_keep = telemetry_numeric_cols + telemetry_categorical_cols + ['Year', 'Driver', 'SectorNumber', 'LapsTilPit']

In [29]:
train_df, val_df, test_df, label_col, scaler_dict, n_classes, encoders = run_preprocessing_pipeline(
    filepath=data_path,
    features_to_keep=features_to_keep,
    numeric_cols=telemetry_numeric_cols,
    categorical_cols=telemetry_categorical_cols,
    laps_label_window=5
)

In [30]:
feature_cols = telemetry_categorical_cols + telemetry_numeric_cols
window_size = 15
X_test, y_test = make_sector_to_lap_sequences(test_df, feature_cols, label_col, window_size)

In [31]:
cat_idx = [feature_cols.index(c) for c in telemetry_categorical_cols]
num_idx = [feature_cols.index(n) for n in telemetry_numeric_cols]
X_test_cat, X_test_num = split_features(X_test, cat_idx, num_idx)

In [32]:
# Only include 'base' models
tel_model_keys = [k for k in models.keys() if 'tel' in k]

tel_predictions = {}
for name in tel_model_keys:
    y_prob, y_pred = predict(models[name], X_test_num, X_test_cat)
    tel_predictions[name] = {
        "y_prob": y_prob,
        "y_pred": y_pred
    }

In [33]:
for name, preds in tel_predictions.items():
    y_pred = preds["y_pred"]
    print(f"\n=== Results for {name} ===\n")
    cm = confusion_matrix(y_test, y_pred)
    cr = classification_report(y_test, y_pred, digits=2)
    print("Confusion Matrix:")
    print(cm)
    print("\nClassification Report:")
    print(cr)


=== Results for tel_rnn ===

Confusion Matrix:
[[4326 2143]
 [ 803 1811]]

Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.67      0.75      6469
           1       0.46      0.69      0.55      2614

    accuracy                           0.68      9083
   macro avg       0.65      0.68      0.65      9083
weighted avg       0.73      0.68      0.69      9083


=== Results for tel_gru ===

Confusion Matrix:
[[4715 1754]
 [ 971 1643]]

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.73      0.78      6469
           1       0.48      0.63      0.55      2614

    accuracy                           0.70      9083
   macro avg       0.66      0.68      0.66      9083
weighted avg       0.73      0.70      0.71      9083


=== Results for tel_lstm ===

Confusion Matrix:
[[4839 1630]
 [1087 1527]]

Classification Report:
              precision    recall  f1-score   su

## Test Data

In [24]:
import fastf1
from datetime import datetime, timezone
import json
import os

fastf1.Cache.enable_cache('./cache')

In [25]:
schedule = fastf1.get_event_schedule(2025)
today = datetime.now(timezone.utc).date()

filtered = schedule[
    schedule['EventName'].notnull() &
    schedule['EventName'].str.contains('Grand Prix') &
    (schedule['Session5DateUtc'].dt.date < today)
]

filtered[['EventName', 'EventDate']]

Unnamed: 0,EventName,EventDate
1,Australian Grand Prix,2025-03-16
2,Chinese Grand Prix,2025-03-23
3,Japanese Grand Prix,2025-04-06
4,Bahrain Grand Prix,2025-04-13
5,Saudi Arabian Grand Prix,2025-04-20
6,Miami Grand Prix,2025-05-04
7,Emilia Romagna Grand Prix,2025-05-18
8,Monaco Grand Prix,2025-05-25
9,Spanish Grand Prix,2025-06-01
10,Canadian Grand Prix,2025-06-15


In [26]:
year = 2025
event_name = 'Bahrain Grand Prix'

session = fastf1.get_session(year, event_name, 'R')
session.load(telemetry=True, weather=True)

laps_df = session.laps.copy()
laps_df['Year'] = year
laps_df['EventName'] = event_name

weather_df = session.weather_data.copy()
weather_df['Year'] = year
weather_df['EventName'] = event_name

telemetry_dfs = []
for idx, lap in session.laps.iterlaps():
    tel = lap.get_car_data().add_track_status()
    tel['Year'] = year
    tel['EventName'] = event_name
    tel['Driver'] = lap.Driver
    tel['LapNumber'] = lap.LapNumber
    telemetry_dfs.append(tel)
if telemetry_dfs:
    telemetry_df = pd.concat(telemetry_dfs, ignore_index=True)

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.5.3]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
req            INFO 	Using cached data for weather_data
req            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['81', '63', '4', '16', '44', '1', '10', '31', '22', '87', '12', '23', '6', '7', '14', '30', '18', '5', '55', '27']


In [27]:
print(laps_df.shape)
print(weather_df.shape)
print(telemetry_df.shape)

(1128, 33)
(158, 10)
(425181, 15)


In [28]:
laps_df.columns

Index(['Time', 'Driver', 'DriverNumber', 'LapTime', 'LapNumber', 'Stint',
       'PitOutTime', 'PitInTime', 'Sector1Time', 'Sector2Time', 'Sector3Time',
       'Sector1SessionTime', 'Sector2SessionTime', 'Sector3SessionTime',
       'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST', 'IsPersonalBest',
       'Compound', 'TyreLife', 'FreshTyre', 'Team', 'LapStartTime',
       'LapStartDate', 'TrackStatus', 'Position', 'Deleted', 'DeletedReason',
       'FastF1Generated', 'IsAccurate', 'Year', 'EventName'],
      dtype='object')

In [29]:
laps_df = laps_df[[
    'Year', 'EventName', 'Team', 'Driver', 
    'Stint', 'LapNumber', 'LapStartTime',
    'Sector1Time', 'Sector2Time', 'Sector3Time',
    'Sector1SessionTime', 'Sector2SessionTime', 'Sector3SessionTime',
    'Compound', 'TyreLife', 'FreshTyre'
]]

In [30]:
telemetry_df.columns

Index(['Date', 'RPM', 'Speed', 'nGear', 'Throttle', 'Brake', 'DRS', 'Source',
       'Time', 'SessionTime', 'TrackStatus', 'Year', 'EventName', 'Driver',
       'LapNumber'],
      dtype='object')

In [31]:
telemetry_df = telemetry_df[[
    'Year', 'EventName', 'Driver', 'LapNumber', 'SessionTime', 
    'RPM', 'Speed', 'nGear', 'Throttle', 'Brake', 'DRS', 'TrackStatus'
]]

In [32]:
weather_df.columns

Index(['Time', 'AirTemp', 'Humidity', 'Pressure', 'Rainfall', 'TrackTemp',
       'WindDirection', 'WindSpeed', 'Year', 'EventName'],
      dtype='object')

In [33]:
weather_df.rename(columns={'Time': 'SessionTime'}, inplace=True)

In [34]:
weather_df = weather_df[[
    'Year', 'EventName', 'SessionTime',
    'AirTemp', 'Humidity', 'Pressure', 'Rainfall',
    'TrackTemp', 'WindDirection', 'WindSpeed'
]]

In [35]:
folder = f"data/{event_name} {year}/"
os.makedirs(folder, exist_ok=True)

In [36]:
dataframes = {
    "laps": laps_df,
    "weather": weather_df,
    "telemetry": telemetry_df
}

for name, df in dataframes.items():
    with open(os.path.join(folder, f"{name}.json"), "w") as f:
        json.dump(df.to_dict(orient="records"), f, indent=2, default=str)

In [87]:
with open(os.path.join(folder, "laps.json"), "r") as f:
    laps_df = pd.DataFrame(json.load(f))

with open(os.path.join(folder, "telemetry.json"), "r") as f:
    telemetry_df = pd.DataFrame(json.load(f))

with open(os.path.join(folder, "weather.json"), "r") as f:
    weather_df = pd.DataFrame(json.load(f))

In [38]:
def parse_laps_to_sector(laps_df_copy):
    # Build per-sector DataFrame
    sector_list = []
    for sec_num in [1, 2, 3]:
        sec_df = laps_df_copy.copy()
        sec_df['SectorNumber'] = sec_num
        sec_df['SectorTime'] = sec_df[f'Sector{sec_num}Time']
        sec_df['SectorSessionTime_Start'] = (
            sec_df['LapStartTime'] if sec_num == 1 else sec_df[f'Sector{sec_num-1}SessionTime']
        )
        sec_df['SectorSessionTime_End'] = sec_df[f'Sector{sec_num}SessionTime']
        sector_list.append(sec_df)
    per_sector = pd.concat(sector_list, ignore_index=True)

    # Build sector interval table
    cols_to_copy = [
        'Year', 'EventName', 'Team', 'Driver',
        'Stint', 'LapNumber', 'SectorNumber',
        'Compound', 'TyreLife', 'SectorTime', 
        'SectorSessionTime_Start', 'SectorSessionTime_End'
        ]
    sector_intervals = per_sector[cols_to_copy].copy()
    return sector_intervals

In [39]:
sector_df = parse_laps_to_sector(laps_df)

In [40]:
with open(os.path.join(folder, "sector.json"), "w") as f:
    json.dump(sector_df.to_dict(orient="records"), f, indent=2, default=str)

In [41]:
sector_df

Unnamed: 0,Year,EventName,Team,Driver,Stint,LapNumber,SectorNumber,Compound,TyreLife,SectorTime,SectorSessionTime_Start,SectorSessionTime_End
0,2025,Bahrain Grand Prix,McLaren,PIA,1.0,1.0,1,SOFT,4.0,NaT,0 days 00:55:34.907000,NaT
1,2025,Bahrain Grand Prix,McLaren,PIA,1.0,2.0,1,SOFT,5.0,0 days 00:00:31.139000,0 days 00:57:13.827000,0 days 00:57:45.021000
2,2025,Bahrain Grand Prix,McLaren,PIA,1.0,3.0,1,SOFT,6.0,0 days 00:00:31.306000,0 days 00:58:51.319000,0 days 00:59:22.680000
3,2025,Bahrain Grand Prix,McLaren,PIA,1.0,4.0,1,SOFT,7.0,0 days 00:00:31.326000,0 days 01:00:29.402000,0 days 01:01:00.783000
4,2025,Bahrain Grand Prix,McLaren,PIA,1.0,5.0,1,SOFT,8.0,0 days 00:00:31.305000,0 days 01:02:07.535000,0 days 01:02:38.895000
...,...,...,...,...,...,...,...,...,...,...,...,...
3379,2025,Bahrain Grand Prix,Kick Sauber,HUL,3.0,53.0,3,MEDIUM,26.0,0 days 00:00:24.184000,0 days 02:25:09.156000,0 days 02:25:33.340000
3380,2025,Bahrain Grand Prix,Kick Sauber,HUL,3.0,54.0,3,MEDIUM,27.0,0 days 00:00:24.118000,0 days 02:26:47.720000,0 days 02:27:11.838000
3381,2025,Bahrain Grand Prix,Kick Sauber,HUL,3.0,55.0,3,MEDIUM,28.0,0 days 00:00:24.117000,0 days 02:28:26.145000,0 days 02:28:50.262000
3382,2025,Bahrain Grand Prix,Kick Sauber,HUL,3.0,56.0,3,MEDIUM,29.0,0 days 00:00:24.393000,0 days 02:30:04.867000,0 days 02:30:29.260000


In [42]:
sector_df.columns

Index(['Year', 'EventName', 'Team', 'Driver', 'Stint', 'LapNumber',
       'SectorNumber', 'Compound', 'TyreLife', 'SectorTime',
       'SectorSessionTime_Start', 'SectorSessionTime_End'],
      dtype='object')

In [43]:
telemetry_df.columns

Index(['Year', 'EventName', 'Driver', 'LapNumber', 'SessionTime', 'RPM',
       'Speed', 'nGear', 'Throttle', 'Brake', 'DRS', 'TrackStatus'],
      dtype='object')

In [44]:
weather_df.columns

Index(['Year', 'EventName', 'SessionTime', 'AirTemp', 'Humidity', 'Pressure',
       'Rainfall', 'TrackTemp', 'WindDirection', 'WindSpeed'],
      dtype='object')

In [57]:
pd.to_timedelta(sector_df[sector_df['LapNumber']==6]['SectorSessionTime_Start']).dt.total_seconds().describe()

count      60.000000
mean     3872.352667
std        32.265284
min      3825.578000
25%      3841.768750
50%      3867.186000
75%      3905.382250
max      3940.445000
Name: SectorSessionTime_Start, dtype: float64

In [58]:
pd.to_timedelta(sector_df['SectorSessionTime_Start']).dt.total_seconds().describe()

count    3320.000000
mean     6186.749305
std      1664.096296
min      3334.907000
25%      4745.744750
50%      6131.000500
75%      7634.954500
max      9116.986000
Name: SectorSessionTime_Start, dtype: float64

In [61]:
sector_df['SectorSessionTime_Start'].value_counts()

SectorSessionTime_Start
NaT                       64
0 days 00:55:34.907000    20
0 days 01:14:51.773000     1
0 days 00:58:27.364000     1
0 days 01:00:05.407000     1
                          ..
0 days 02:22:17.013000     1
0 days 02:23:55.038000     1
0 days 02:25:33.298000     1
0 days 02:27:11.796000     1
0 days 02:31:43.475000     1
Name: count, Length: 3302, dtype: int64

In [66]:
sector_df['SectorSessionTime_End'].value_counts()

SectorSessionTime_End
NaT                       84
0 days 01:16:54.882000     1
0 days 01:00:29.457000     1
0 days 01:02:07.590000     1
0 days 01:03:45.633000     1
                          ..
0 days 01:26:42.488000     1
0 days 01:28:20.087000     1
0 days 01:29:57.736000     1
0 days 01:31:34.945000     1
0 days 02:32:08.083000     1
Name: count, Length: 3301, dtype: int64

In [157]:
with open(os.path.join(folder, "sector.json"), "r") as f:
    sector_df = pd.DataFrame(json.load(f))

In [158]:
sector_df.isna().sum()

Year                       0
EventName                  0
Team                       0
Driver                     0
Stint                      0
LapNumber                  0
SectorNumber               0
Compound                   0
TyreLife                   0
SectorTime                 0
SectorSessionTime_Start    0
SectorSessionTime_End      0
dtype: int64

In [159]:
sector_df['SectorSessionTime_End'].value_counts()

SectorSessionTime_End
NaT                       84
0 days 01:16:54.882000     1
0 days 01:00:29.457000     1
0 days 01:02:07.590000     1
0 days 01:03:45.633000     1
                          ..
0 days 01:26:42.488000     1
0 days 01:28:20.087000     1
0 days 01:29:57.736000     1
0 days 01:31:34.945000     1
0 days 02:32:08.083000     1
Name: count, Length: 3301, dtype: int64

In [160]:
sector_df['SectorSessionTime_End'] = pd.to_timedelta(sector_df['SectorSessionTime_End'], errors='coerce')

In [161]:
sector_df['SectorSessionTime_End'].isna().sum()

np.int64(84)

In [162]:
sector_df.dtypes

Year                                 int64
EventName                           object
Team                                object
Driver                              object
Stint                              float64
LapNumber                          float64
SectorNumber                         int64
Compound                            object
TyreLife                           float64
SectorTime                          object
SectorSessionTime_Start             object
SectorSessionTime_End      timedelta64[ns]
dtype: object

In [163]:
telemetry_df.dtypes

Year                     int64
EventName               object
Driver                  object
LapNumber                int64
SessionTime    timedelta64[ns]
RPM                    float64
Speed                  float64
nGear                    int64
Throttle               float64
Brake                     bool
DRS                      int64
TrackStatus              int64
dtype: object

In [164]:
weather_df.dtypes

Year                       int64
EventName                 object
SessionTime      timedelta64[ns]
AirTemp                  float64
Humidity                 float64
Pressure                 float64
Rainfall                    bool
TrackTemp                float64
WindDirection              int64
WindSpeed                float64
dtype: object

In [206]:
# For sector
sector_df['SectorTime'] = pd.to_timedelta(sector_df['SectorTime'], errors='coerce').dt.total_seconds()
sector_df['SectorSessionTime_Start'] = pd.to_timedelta(sector_df['SectorSessionTime_Start'], errors='coerce')
sector_df['SectorSessionTime_End'] = pd.to_timedelta(sector_df['SectorSessionTime_End'], errors='coerce')
sector_df['LapNumber'] = sector_df['LapNumber'].astype(int)

# For telemetry
telemetry_df['SessionTime'] = pd.to_timedelta(telemetry_df['SessionTime'], errors='coerce')
telemetry_df['LapNumber'] = telemetry_df['LapNumber'].astype(int)
telemetry_df['TrackStatus'] = telemetry_df['TrackStatus'].astype(int)

# For weather
weather_df['SessionTime'] = pd.to_timedelta(weather_df['SessionTime'], errors='coerce')

In [209]:
test = sector_df.copy()

In [210]:
test['SectorSessionTime_Start']

0      0 days 00:55:34.907000
1      0 days 00:57:13.827000
2      0 days 00:58:51.319000
3      0 days 01:00:29.402000
4      0 days 01:02:07.535000
                ...          
3379   0 days 02:25:09.156000
3380   0 days 02:26:47.720000
3381   0 days 02:28:26.145000
3382   0 days 02:30:04.867000
3383   0 days 02:31:43.475000
Name: SectorSessionTime_Start, Length: 3384, dtype: timedelta64[ns]

In [211]:
pd.to_timedelta(pd.to_timedelta(test['SectorSessionTime_Start']))

0      0 days 00:55:34.907000
1      0 days 00:57:13.827000
2      0 days 00:58:51.319000
3      0 days 01:00:29.402000
4      0 days 01:02:07.535000
                ...          
3379   0 days 02:25:09.156000
3380   0 days 02:26:47.720000
3381   0 days 02:28:26.145000
3382   0 days 02:30:04.867000
3383   0 days 02:31:43.475000
Name: SectorSessionTime_Start, Length: 3384, dtype: timedelta64[ns]

In [223]:
test['SectorSessionTime_Start'].dt.total_seconds()

0       3334.907
1       3433.827
2       3531.319
3       3629.402
4       3727.535
          ...   
3379    8709.156
3380    8807.720
3381    8906.145
3382    9004.867
3383    9103.475
Name: SectorSessionTime_Start, Length: 3384, dtype: float64

In [215]:
sector_df.head()

Unnamed: 0,Year,EventName,Team,Driver,Stint,LapNumber,SectorNumber,Compound,TyreLife,SectorTime,SectorSessionTime_Start,SectorSessionTime_End
0,2025,Bahrain Grand Prix,McLaren,PIA,1.0,1,1,SOFT,4.0,,0 days 00:55:34.907000,NaT
1,2025,Bahrain Grand Prix,McLaren,PIA,1.0,2,1,SOFT,5.0,3.1e-08,0 days 00:57:13.827000,0 days 00:57:45.021000
2,2025,Bahrain Grand Prix,McLaren,PIA,1.0,3,1,SOFT,6.0,3.1e-08,0 days 00:58:51.319000,0 days 00:59:22.680000
3,2025,Bahrain Grand Prix,McLaren,PIA,1.0,4,1,SOFT,7.0,3.1e-08,0 days 01:00:29.402000,0 days 01:01:00.783000
4,2025,Bahrain Grand Prix,McLaren,PIA,1.0,5,1,SOFT,8.0,3.1e-08,0 days 01:02:07.535000,0 days 01:02:38.895000


In [166]:
sector_df.dtypes

Year                                 int64
EventName                           object
Team                                object
Driver                              object
Stint                              float64
LapNumber                            int64
SectorNumber                         int64
Compound                            object
TyreLife                           float64
SectorTime                         float64
SectorSessionTime_Start    timedelta64[ns]
SectorSessionTime_End      timedelta64[ns]
dtype: object

In [180]:
telemetry_df.dtypes

Year                     int64
EventName               object
Driver                  object
LapNumber                int64
SessionTime    timedelta64[ns]
RPM                    float64
Speed                  float64
nGear                    int64
Throttle               float64
Brake                     bool
DRS                      int64
TrackStatus              int64
dtype: object

In [168]:
weather_df.dtypes

Year                       int64
EventName                 object
SessionTime      timedelta64[ns]
AirTemp                  float64
Humidity                 float64
Pressure                 float64
Rainfall                    bool
TrackTemp                float64
WindDirection              int64
WindSpeed                float64
dtype: object

In [169]:
sector_df.isna().sum()

Year                        0
EventName                   0
Team                        0
Driver                      0
Stint                       0
LapNumber                   0
SectorNumber                0
Compound                    0
TyreLife                    0
SectorTime                 80
SectorSessionTime_Start    64
SectorSessionTime_End      84
dtype: int64

In [174]:
sector_df[sector_df['SectorTime'].isna()][['Driver', 'LapNumber']].value_counts()

Driver  LapNumber
RUS     42           3
        50           3
        43           3
        46           3
        41           3
        40           3
        39           3
        38           3
        47           3
        48           3
        49           3
        51           3
        45           3
        52           3
        53           3
        54           3
        55           3
        56           3
        57           3
        44           3
SAI     1            1
STR     1            1
TSU     1            1
ALB     1            1
ALO     1            1
RUS     1            1
PIA     1            1
OCO     1            1
NOR     1            1
LEC     1            1
LAW     1            1
HUL     1            1
HAM     1            1
HAD     1            1
GAS     1            1
DOO     1            1
BOR     1            1
BEA     1            1
ANT     1            1
VER     1            1
Name: count, dtype: int64

In [175]:
sector_df[sector_df['SectorSessionTime_Start'].isna()][['Driver', 'LapNumber']].value_counts()

Driver  LapNumber
RUS     40           2
        50           2
        39           2
        38           2
        43           2
        44           2
        45           2
        46           2
        47           2
        48           2
        49           2
        51           2
        41           2
        52           2
        53           2
        54           2
        55           2
        56           2
        57           2
        42           2
SAI     1            1
STR     1            1
TSU     1            1
ALB     1            1
ALO     1            1
RUS     1            1
ANT     1            1
BEA     1            1
BOR     1            1
DOO     1            1
        28           1
GAS     1            1
        41           1
        48           1
HAD     1            1
HAM     1            1
HUL     1            1
LAW     1            1
        22           1
LEC     1            1
NOR     1            1
OCO     1            1
PIA     1       

In [176]:
sector_df[sector_df['SectorSessionTime_End'].isna()][['Driver', 'LapNumber']].value_counts()

Driver  LapNumber
RUS     40           3
        50           3
        39           3
        38           3
        43           3
        44           3
        45           3
        46           3
        47           3
        48           3
        49           3
        51           3
        41           3
        52           3
        53           3
        54           3
        55           3
        56           3
        57           3
        42           3
SAI     1            1
STR     1            1
TSU     1            1
ALB     1            1
ALO     1            1
RUS     1            1
ANT     1            1
BEA     1            1
BOR     1            1
DOO     1            1
        28           1
GAS     1            1
        41           1
        48           1
HAD     1            1
HAM     1            1
HUL     1            1
LAW     1            1
        22           1
LEC     1            1
NOR     1            1
OCO     1            1
PIA     1       

In [193]:
telemetry_df[telemetry_df['LapNumber']<13]['LapNumber'].value_counts()

LapNumber
1     7979
11    7627
9     7546
2     7523
10    7508
5     7503
12    7441
7     7425
6     7413
8     7399
3     7395
4     7364
Name: count, dtype: int64

In [195]:
telemetry_df[telemetry_df['LapNumber']<12]['LapNumber'].shape

(82682,)

In [204]:
telemetry_df.groupby(['Driver', 'LapNumber'])[['Driver', 'LapNumber']].value_counts()

Driver  LapNumber
ALB     1            405
        2            381
        3            368
        4            369
        5            372
                    ... 
VER     53           367
        54           354
        55           362
        56           370
        57           359
Name: count, Length: 1128, dtype: int64

In [233]:
sector_df[sector_df.isna().any(axis=1)]['LapNumber']

0        1
57       1
94      38
95      39
96      40
        ..
2365    53
2366    54
2367    55
2368    56
2369    57
Name: LapNumber, Length: 108, dtype: int64