In [1]:
import pandas as pd
import numpy as np
import fastf1 as ff1
from datetime import datetime as dt

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)

In [2]:
# get the schedule df's by year

def get_dataframe_schedule(year):
    df = ff1.get_event_schedule(year)
    return (
        df
        .drop(columns=["Location", "OfficialEventName", "Session1Date", "Session1DateUtc", "Session2Date", "Session2DateUtc", "Session3Date", "Session3DateUtc",
                       "Session4Date", "Session4DateUtc", "Session5Date", "Session5DateUtc", "F1ApiSupport"])
        .loc[~df['EventName'].str.contains('Pre-Season', na=False)]
        .reset_index(drop=True)
    )

In [3]:
df1 = get_dataframe_schedule(2024)
df1



Unnamed: 0,RoundNumber,Country,EventDate,EventName,EventFormat,Session1,Session2,Session3,Session4,Session5
0,1,Bahrain,2024-03-02,Bahrain Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race
1,2,Saudi Arabia,2024-03-09,Saudi Arabian Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race
2,3,Australia,2024-03-24,Australian Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race
3,4,Japan,2024-04-07,Japanese Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race
4,5,China,2024-04-21,Chinese Grand Prix,sprint_qualifying,Practice 1,Sprint Qualifying,Sprint,Qualifying,Race
5,6,United States,2024-05-05,Miami Grand Prix,sprint_qualifying,Practice 1,Sprint Qualifying,Sprint,Qualifying,Race
6,7,Italy,2024-05-19,Emilia Romagna Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race
7,8,Monaco,2024-05-26,Monaco Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race
8,9,Canada,2024-06-09,Canadian Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race
9,10,Spain,2024-06-23,Spanish Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race


In [4]:
# get the race results df's

def get_race(year, gp):
    race = ff1.get_session(year, gp, "R")
    race.load(laps=False, telemetry=False, weather=False, messages=False, livedata=False)
    race_results_df = race.results
    race_results_df = race_results_df.drop(columns=["BroadcastName", "Abbreviation", "DriverId", "TeamColor", "TeamId", "FirstName", "LastName", "HeadshotUrl", "CountryCode", "Q1", "Q2", "Q3", "Time"])
    df_event = get_dataframe_schedule(year)
    event_name = df_event.at[gp-1, "EventName"]
    race_results_df["EventName"] = event_name
    return race_results_df

In [None]:
# concat the results df's

race_dataframes = []
for race in range(1, df1["RoundNumber"].max() + 1):
    df_race = get_race(2023, race)
    race_dataframes.append(df_race)

df_race_concat = pd.concat(race_dataframes, axis=0, ignore_index=True)

In [5]:
# get laps data from race

def get_laps_data(year, gp):
    race = ff1.get_session(year, gp, "R")
    race.load(telemetry=False, weather=False, messages=False, livedata=False)
    df_laps = race.laps
    df = df_laps.drop(columns=["Time", "PitOutTime", "PitInTime", "Sector1SessionTime", "Sector2SessionTime", "Sector3SessionTime", "SpeedFL", "SpeedST",
                           "IsPersonalBest", "LapStartTime", "LapStartDate", "TrackStatus", "DeletedReason", "FastF1Generated", "IsAccurate"])
    return df

In [None]:
laps_dataframe = []
for race in range(1, df1["RoundNumber"].max() + 1):
    lap = get_laps_data(2023, race)
    laps_dataframe.append(lap)


laps_df_concat = pd.concat(laps_dataframe, axis=0, ignore_index=True)

In [68]:
season = dt.now().year
events = ff1.get_event_schedule(season)
today = pd.Timestamp.now()
past_events = events[events["EventDate"] <= today]

In [69]:
past_events

Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,...,Session3,Session3Date,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport
0,0,Bahrain,Sakhir,FORMULA 1 ARAMCO PRE-SEASON TESTING 2024,2024-02-23,Pre-Season Testing,testing,Practice 1,2024-02-21 10:00:00+03:00,2024-02-21 07:00:00,...,Practice 3,2024-02-23 10:00:00+03:00,2024-02-23 07:00:00,,NaT,NaT,,NaT,NaT,True
1,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,...,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True
2,2,Saudi Arabia,Jeddah,FORMULA 1 STC SAUDI ARABIAN GRAND PRIX 2024,2024-03-09,Saudi Arabian Grand Prix,conventional,Practice 1,2024-03-07 16:30:00+03:00,2024-03-07 13:30:00,...,Practice 3,2024-03-08 16:30:00+03:00,2024-03-08 13:30:00,Qualifying,2024-03-08 20:00:00+03:00,2024-03-08 17:00:00,Race,2024-03-09 20:00:00+03:00,2024-03-09 17:00:00,True
3,3,Australia,Melbourne,FORMULA 1 ROLEX AUSTRALIAN GRAND PRIX 2024,2024-03-24,Australian Grand Prix,conventional,Practice 1,2024-03-22 12:30:00+11:00,2024-03-22 01:30:00,...,Practice 3,2024-03-23 12:30:00+11:00,2024-03-23 01:30:00,Qualifying,2024-03-23 16:00:00+11:00,2024-03-23 05:00:00,Race,2024-03-24 15:00:00+11:00,2024-03-24 04:00:00,True
4,4,Japan,Suzuka,FORMULA 1 MSC CRUISES JAPANESE GRAND PRIX 2024,2024-04-07,Japanese Grand Prix,conventional,Practice 1,2024-04-05 11:30:00+09:00,2024-04-05 02:30:00,...,Practice 3,2024-04-06 11:30:00+09:00,2024-04-06 02:30:00,Qualifying,2024-04-06 15:00:00+09:00,2024-04-06 06:00:00,Race,2024-04-07 14:00:00+09:00,2024-04-07 05:00:00,True
5,5,China,Shanghai,FORMULA 1 LENOVO CHINESE GRAND PRIX 2024,2024-04-21,Chinese Grand Prix,sprint_qualifying,Practice 1,2024-04-19 11:30:00+08:00,2024-04-19 03:30:00,...,Sprint,2024-04-20 11:00:00+08:00,2024-04-20 03:00:00,Qualifying,2024-04-20 15:00:00+08:00,2024-04-20 07:00:00,Race,2024-04-21 15:00:00+08:00,2024-04-21 07:00:00,True
6,6,United States,Miami,FORMULA 1 CRYPTO.COM MIAMI GRAND PRIX 2024,2024-05-05,Miami Grand Prix,sprint_qualifying,Practice 1,2024-05-03 12:30:00-04:00,2024-05-03 16:30:00,...,Sprint,2024-05-04 12:00:00-04:00,2024-05-04 16:00:00,Qualifying,2024-05-04 16:00:00-04:00,2024-05-04 20:00:00,Race,2024-05-05 16:00:00-04:00,2024-05-05 20:00:00,True
7,7,Italy,Imola,FORMULA 1 MSC CRUISES GRAN PREMIO DEL MADE IN ...,2024-05-19,Emilia Romagna Grand Prix,conventional,Practice 1,2024-05-17 13:30:00+02:00,2024-05-17 11:30:00,...,Practice 3,2024-05-18 12:30:00+02:00,2024-05-18 10:30:00,Qualifying,2024-05-18 16:00:00+02:00,2024-05-18 14:00:00,Race,2024-05-19 15:00:00+02:00,2024-05-19 13:00:00,True
8,8,Monaco,Monaco,FORMULA 1 GRAND PRIX DE MONACO 2024,2024-05-26,Monaco Grand Prix,conventional,Practice 1,2024-05-24 13:30:00+02:00,2024-05-24 11:30:00,...,Practice 3,2024-05-25 12:30:00+02:00,2024-05-25 10:30:00,Qualifying,2024-05-25 16:00:00+02:00,2024-05-25 14:00:00,Race,2024-05-26 15:00:00+02:00,2024-05-26 13:00:00,True
9,9,Canada,Montréal,FORMULA 1 AWS GRAND PRIX DU CANADA 2024,2024-06-09,Canadian Grand Prix,conventional,Practice 1,2024-06-07 13:30:00-04:00,2024-06-07 17:30:00,...,Practice 3,2024-06-08 12:30:00-04:00,2024-06-08 16:30:00,Qualifying,2024-06-08 16:00:00-04:00,2024-06-08 20:00:00,Race,2024-06-09 14:00:00-04:00,2024-06-09 18:00:00,True


In [None]:
race_dataframes = []
laps_dataframe = []
for race in range(1, past_events["RoundNumber"].max() + 1):
    df_race = get_race(2024, race)
    lap = get_laps_data(2024, race)

    if pd.isna(df_race["Position"].iloc[0]):
        max_lap_number = lap['LapNumber'].max()
        last_lap = lap[lap["LapNumber"] == max_lap_number]
        last_positions = last_lap.set_index('DriverNumber')['Position']
        df_race["Position"] = df_race["DriverNumber"].map(last_positions).fillna(0).astype(int)
        df_race['IsZero'] = df_race['Position'] == 0
        df_race = df_race.sort_values(by=['IsZero', 'Position']).reset_index(drop=True)
        df_race = df_race.drop(columns=['IsZero'])

    race_dataframes.append(df_race)
    laps_dataframe.append(lap)

df_race_concat = pd.concat(race_dataframes, axis=0, ignore_index=True)
laps_df_concat = pd.concat(laps_dataframe, axis=0, ignore_index=True)

df_merge_schedule_results = pd.merge(past_events, df_race_concat, on="EventName")
df_merge_all = pd.merge(df_merge_schedule_results, laps_df_concat, on="DriverNumber")

In [71]:
df_merge_all

Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,...,Sector2Time,Sector3Time,SpeedI1,SpeedI2,Compound,TyreLife,FreshTyre,Team,Position_y,Deleted
0,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,...,0 days 00:00:41.266000,0 days 00:00:23.616000,234.0,250.0,SOFT,4.0,False,Red Bull Racing,1.0,
1,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,...,0 days 00:00:41.661000,0 days 00:00:23.719000,232.0,248.0,SOFT,5.0,False,Red Bull Racing,1.0,
2,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,...,0 days 00:00:41.966000,0 days 00:00:23.788000,231.0,243.0,SOFT,6.0,False,Red Bull Racing,1.0,
3,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,...,0 days 00:00:41.892000,0 days 00:00:23.824000,233.0,253.0,SOFT,7.0,False,Red Bull Racing,1.0,
4,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,...,0 days 00:00:42.056000,0 days 00:00:23.862000,231.0,245.0,SOFT,8.0,False,Red Bull Racing,1.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191133,13,Hungary,Budapest,FORMULA 1 HUNGARIAN GRAND PRIX 2024,2024-07-21,Hungarian Grand Prix,conventional,Practice 1,2024-07-19 13:30:00+02:00,2024-07-19 11:30:00,...,0 days 00:00:30.206000,0 days 00:00:23.563000,279.0,235.0,MEDIUM,1.0,True,Alpine,19.0,
191134,13,Hungary,Budapest,FORMULA 1 HUNGARIAN GRAND PRIX 2024,2024-07-21,Hungarian Grand Prix,conventional,Practice 1,2024-07-19 13:30:00+02:00,2024-07-19 11:30:00,...,0 days 00:00:30.081000,0 days 00:00:23.638000,276.0,237.0,MEDIUM,2.0,True,Alpine,17.0,
191135,13,Hungary,Budapest,FORMULA 1 HUNGARIAN GRAND PRIX 2024,2024-07-21,Hungarian Grand Prix,conventional,Practice 1,2024-07-19 13:30:00+02:00,2024-07-19 11:30:00,...,0 days 00:00:30.046000,0 days 00:00:23.540000,277.0,240.0,MEDIUM,3.0,True,Alpine,16.0,
191136,13,Hungary,Budapest,FORMULA 1 HUNGARIAN GRAND PRIX 2024,2024-07-21,Hungarian Grand Prix,conventional,Practice 1,2024-07-19 13:30:00+02:00,2024-07-19 11:30:00,...,0 days 00:00:30.176000,0 days 00:00:23.656000,,241.0,MEDIUM,4.0,True,Alpine,15.0,


In [72]:
df_merge_all.to_csv("race_results_laps_2024.csv")

In [6]:
df = pd.read_csv("race_results_laps_2024.csv")

  df = pd.read_csv("race_results_laps_2024.csv")


In [7]:
df.head()

Unnamed: 0.1,Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,Session2,Session2Date,Session2DateUtc,Session3,Session3Date,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport,DriverNumber,TeamName,FullName,Position_x,ClassifiedPosition,GridPosition,Status,Points,Driver,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,SpeedI1,SpeedI2,Compound,TyreLife,FreshTyre,Team,Position_y,Deleted
0,0,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:37.284000,1.0,1.0,,0 days 00:00:41.266000,0 days 00:00:23.616000,234.0,250.0,SOFT,4.0,False,Red Bull Racing,1.0,
1,1,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:36.296000,2.0,1.0,0 days 00:00:30.916000,0 days 00:00:41.661000,0 days 00:00:23.719000,232.0,248.0,SOFT,5.0,False,Red Bull Racing,1.0,
2,2,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:36.753000,3.0,1.0,0 days 00:00:30.999000,0 days 00:00:41.966000,0 days 00:00:23.788000,231.0,243.0,SOFT,6.0,False,Red Bull Racing,1.0,
3,3,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:36.647000,4.0,1.0,0 days 00:00:30.931000,0 days 00:00:41.892000,0 days 00:00:23.824000,233.0,253.0,SOFT,7.0,False,Red Bull Racing,1.0,
4,4,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:37.173000,5.0,1.0,0 days 00:00:31.255000,0 days 00:00:42.056000,0 days 00:00:23.862000,231.0,245.0,SOFT,8.0,False,Red Bull Racing,1.0,


In [8]:
df.isnull().sum()

Unnamed: 0                 0
RoundNumber                0
Country                    0
Location                   0
OfficialEventName          0
EventDate                  0
EventName                  0
EventFormat                0
Session1                   0
Session1Date               0
Session1DateUtc            0
Session2                   0
Session2Date               0
Session2DateUtc            0
Session3                   0
Session3Date               0
Session3DateUtc            0
Session4                   0
Session4Date               0
Session4DateUtc            0
Session5                   0
Session5Date               0
Session5DateUtc            0
F1ApiSupport               0
DriverNumber               0
TeamName                   0
FullName                   0
Position_x                 0
ClassifiedPosition     29612
GridPosition           29612
Status                 29612
Points                 29612
Driver                     0
LapTime                 1949
LapNumber     

In [10]:
df_subset = df[["EventName", "FullName", "LapTime", "Sector1Time", "Sector2Time", "Sector3Time", "SpeedI1", "SpeedI2", "LapNumber"]]

In [11]:
df_filtered = df_subset[df_subset.isna().any(axis=1)]
df_filtered

Unnamed: 0,EventName,FullName,LapTime,Sector1Time,Sector2Time,Sector3Time,SpeedI1,SpeedI2,LapNumber
0,Bahrain Grand Prix,Max Verstappen,0 days 00:01:37.284000,,0 days 00:00:41.266000,0 days 00:00:23.616000,234.0,250.0,1.0
5,Bahrain Grand Prix,Max Verstappen,0 days 00:01:37.092000,0 days 00:00:31.041000,0 days 00:00:42.187000,0 days 00:00:23.864000,,247.0,6.0
8,Bahrain Grand Prix,Max Verstappen,0 days 00:01:37.229000,0 days 00:00:31.100000,0 days 00:00:42.166000,0 days 00:00:23.963000,,243.0,9.0
10,Bahrain Grand Prix,Max Verstappen,0 days 00:01:37.085000,0 days 00:00:30.974000,0 days 00:00:42.142000,0 days 00:00:23.969000,,246.0,11.0
13,Bahrain Grand Prix,Max Verstappen,0 days 00:01:37.028000,0 days 00:00:30.938000,0 days 00:00:42.042000,0 days 00:00:24.048000,,253.0,14.0
...,...,...,...,...,...,...,...,...,...
191123,Hungarian Grand Prix,Pierre Gasly,0 days 00:01:25.612000,0 days 00:00:30.439000,0 days 00:00:30.986000,0 days 00:00:24.187000,,232.0,19.0
191124,Hungarian Grand Prix,Pierre Gasly,0 days 00:01:25.468000,0 days 00:00:30.412000,0 days 00:00:30.856000,0 days 00:00:24.200000,,232.0,20.0
191125,Hungarian Grand Prix,Pierre Gasly,0 days 00:01:26.002000,0 days 00:00:30.699000,0 days 00:00:31.058000,0 days 00:00:24.245000,,232.0,21.0
191128,Hungarian Grand Prix,Pierre Gasly,0 days 00:01:25.974000,0 days 00:00:30.535000,0 days 00:00:31.156000,0 days 00:00:24.283000,,232.0,24.0


In [12]:
# mean speed

def fill_na_speed(df, cols):
    for col in cols:
        mean_values = df.groupby(["EventName", "FullName"])[col].transform('mean')
        df[col].fillna(mean_values, inplace=True)
    return df

In [13]:
columns = ["SpeedI1", "SpeedI2"]

In [14]:
df_copy = df.copy()

In [None]:
df_filled = fill_na_speed(df_copy, columns)

In [20]:
df_filled.head(6)

Unnamed: 0.1,Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,Session2,Session2Date,Session2DateUtc,Session3,Session3Date,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport,DriverNumber,TeamName,FullName,Position_x,ClassifiedPosition,GridPosition,Status,Points,Driver,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,SpeedI1,SpeedI2,Compound,TyreLife,FreshTyre,Team,Position_y,Deleted
0,0,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:37.284000,1.0,1.0,,0 days 00:00:41.266000,0 days 00:00:23.616000,234.0,250.0,SOFT,4.0,False,Red Bull Racing,1.0,
1,1,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:36.296000,2.0,1.0,0 days 00:00:30.916000,0 days 00:00:41.661000,0 days 00:00:23.719000,232.0,248.0,SOFT,5.0,False,Red Bull Racing,1.0,
2,2,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:36.753000,3.0,1.0,0 days 00:00:30.999000,0 days 00:00:41.966000,0 days 00:00:23.788000,231.0,243.0,SOFT,6.0,False,Red Bull Racing,1.0,
3,3,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:36.647000,4.0,1.0,0 days 00:00:30.931000,0 days 00:00:41.892000,0 days 00:00:23.824000,233.0,253.0,SOFT,7.0,False,Red Bull Racing,1.0,
4,4,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:37.173000,5.0,1.0,0 days 00:00:31.255000,0 days 00:00:42.056000,0 days 00:00:23.862000,231.0,245.0,SOFT,8.0,False,Red Bull Racing,1.0,
5,5,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:37.092000,6.0,1.0,0 days 00:00:31.041000,0 days 00:00:42.187000,0 days 00:00:23.864000,245.586031,247.0,SOFT,9.0,False,Red Bull Racing,1.0,


In [19]:
df_filled[["SpeedI1", "SpeedI2"]].isna().sum()

SpeedI1    0
SpeedI2    0
dtype: int64

In [21]:
df_filled["Sector1Time"] = pd.to_timedelta(df["Sector1Time"])
df_filled["Sector2Time"] = pd.to_timedelta(df["Sector2Time"])
df_filled["Sector3Time"] = pd.to_timedelta(df["Sector3Time"])

In [23]:
def fill_na_sectorTime(df, cols):
    for col in cols:
        mean_values = df.groupby(["EventName", "FullName"])[col].transform(lambda x: x.dropna().mean())
        df[col].fillna(mean_values, inplace=True)
    return df

In [24]:
columns_sectorTime = ['Sector1Time', 'Sector2Time', 'Sector3Time']

In [None]:
df_filled = fill_na_sectorTime(df_filled, columns_sectorTime)

In [26]:
df_filled[['Sector1Time', 'Sector2Time', 'Sector3Time']].isna().sum()

Sector1Time    0
Sector2Time    0
Sector3Time    0
dtype: int64

In [27]:
df_filled.isna().sum()

Unnamed: 0                 0
RoundNumber                0
Country                    0
Location                   0
OfficialEventName          0
EventDate                  0
EventName                  0
EventFormat                0
Session1                   0
Session1Date               0
Session1DateUtc            0
Session2                   0
Session2Date               0
Session2DateUtc            0
Session3                   0
Session3Date               0
Session3DateUtc            0
Session4                   0
Session4Date               0
Session4DateUtc            0
Session5                   0
Session5Date               0
Session5DateUtc            0
F1ApiSupport               0
DriverNumber               0
TeamName                   0
FullName                   0
Position_x                 0
ClassifiedPosition     29612
GridPosition           29612
Status                 29612
Points                 29612
Driver                     0
LapTime                 1949
LapNumber     

In [238]:
def get_telemetry_by_driver(year, gp, driver):
    telemetry = ff1.get_session(year, gp, "R")
    telemetry.load(weather=False, messages=False, livedata=False)
    start_date = telemetry.session_info["StartDate"]
    start_date_timestamp = pd.Timestamp(start_date)
    telemetry = telemetry.laps.pick_driver(driver).get_telemetry()
    telemetry = telemetry.drop(columns=["Date", "DriverAhead", "DistanceToDriverAhead", "RPM", "Source", "Distance", "RelativeDistance", "Status", "Brake", "X", "Y", "Z"])
    telemetry["Time"] = start_date_timestamp + pd.to_timedelta(telemetry["Time"])
    telemetry.set_index('Time', inplace=True)
    resampled_data = telemetry.resample('60s').agg({
    'Speed': 'mean',
    'Throttle': 'mean',
    }).reset_index()
    return resampled_data

In [263]:
teste = ff1.get_session(2024, 2, "R")
teste.load(weather=False, messages=False, livedata=False)

core           INFO 	Loading data for Saudi Arabian Grand Prix - Race [v3.3.9]
req            INFO 	No cached data found for session_info. Loading data...
req            INFO 	No cached data found for driver_info. Loading data...


KeyError: 'DriverNumber'

In [251]:
teste.results["Abbreviation"].values

array(['VER', 'PER', 'SAI', 'LEC', 'NOR', 'ALO', 'RUS', 'PIA', 'HAM',
       'TSU', 'HUL', 'STR', 'MAG', 'BOT', 'OCO', 'GAS', 'SAR', 'ZHO',
       'RIC', 'ALB'], dtype=object)

In [244]:
telemetry_dataframe = []
#past_events["RoundNumber"].max() + 1
for race in range(1, 5):
    session = ff1.get_session(season, race, "R")
    session.load(weather=False, messages=False, livedata=False)
    drivers = session.results["Abbreviation"].values
    for driver in drivers:
        telemetry_data = get_telemetry_by_driver(season, race, driver)
        telemetry_data["Driver"] = driver
        telemetry_dataframe.append(telemetry_data)
            
df_telemetry_concat = pd.concat(telemetry_dataframe, axis=0, ignore_index=True)

core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.3.9]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info
req            INFO 	Using cached data for session_status_data
req            INFO 	Using cached data for lap_count
req            INFO 	Using cached data for track_status_data
req            INFO 	Using cached data for _extended_timing_data
req            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
req            INFO 	Using cached data for car_data
req            INFO 	Using cached data for position_data
core           INFO 	Finished loading data for 20 drivers: ['1', '11', '55', '16', '63', '4', '44', '81', '14', '18', '24', '20', '3', '22', '23', '27', '31', '10', '77', '2']
core           INFO 	Loading data for Bahrain Grand Prix - Race [v3.3.9]
req            INFO 	Using cached data for session_info
req            INFO 	Using cached data for driver_info

KeyError: 'DriverNumber'

In [243]:
df_telemetry_concat

Unnamed: 0,Time,Speed,Throttle,Driver
0,2024-03-02 18:00:00,169.807947,58.779249,VER
1,2024-03-02 18:01:00,217.495690,72.168103,VER
2,2024-03-02 18:02:00,189.088795,61.513742,VER
3,2024-03-02 18:03:00,198.646154,63.479121,VER
4,2024-03-02 18:04:00,214.455947,66.577093,VER
...,...,...,...,...
1851,2024-03-02 19:28:00,215.702174,67.860870,SAR
1852,2024-03-02 19:29:00,170.134956,50.962389,SAR
1853,2024-03-02 19:30:00,212.665208,70.214442,SAR
1854,2024-03-02 19:31:00,193.910675,60.413943,SAR


In [237]:
race_dataframes = []
laps_dataframe = []
for race in range(1, past_events["RoundNumber"].max() + 1):
    df_race = get_race(2023, race)
    race_dataframes.append(df_race)
    lap = get_laps_data(2023, race)
    laps_dataframe.append(lap)

df_race_concat = pd.concat(race_dataframes, axis=0, ignore_index=True)
laps_df_concat = pd.concat(laps_dataframe, axis=0, ignore_index=True)

df_merge_schedule_results = pd.merge(past_events, df_race_concat, on="EventName")
df_merge_all = pd.merge(df_merge_schedule_results, laps_df_concat, on="DriverNumber")

Unnamed: 0,Time,Speed,Throttle
0,2024-07-07 15:00:00,201.831140,68.353070
1,2024-07-07 15:01:00,217.517467,67.401747
2,2024-07-07 15:02:00,249.019565,74.697826
3,2024-07-07 15:03:00,208.305195,66.948052
4,2024-07-07 15:04:00,228.416300,66.900881
...,...,...,...
78,2024-07-07 16:18:00,233.611111,75.792735
79,2024-07-07 16:19:00,230.358079,76.482533
80,2024-07-07 16:20:00,238.923077,78.002137
81,2024-07-07 16:21:00,233.297593,75.483589


In [174]:
def get_weather_data(year, gp):
    weather = ff1.get_session(year, gp, "R")
    weather.load(telemetry=False, messages=False, livedata=False)
    df_weather = weather.laps.get_weather_data()
    df_weather = df_weather.drop(columns=["Humidity", "Pressure", "WindDirection", "WindSpeed"])
    return df_weather

In [104]:
start_date = race.session_info["StartDate"]
start_date_timestamp = pd.Timestamp(start_date)

In [105]:
start_date_timestamp

Timestamp('2023-03-05 18:00:00')

In [106]:
teste.head(2)

Unnamed: 0,Time,AirTemp,Rainfall,TrackTemp
62,0 days 01:02:45.730000,27.3,False,31.2
64,0 days 01:04:45.759000,27.3,False,31.1


In [107]:
teste["Time"] = pd.to_timedelta(teste["Time"])

In [108]:
teste.head(2)

Unnamed: 0,Time,AirTemp,Rainfall,TrackTemp
62,0 days 01:02:45.730000,27.3,False,31.2
64,0 days 01:04:45.759000,27.3,False,31.1


In [109]:
teste["Datetime"] = start_date_timestamp + teste["Time"]

In [110]:
teste.head(2)

Unnamed: 0,Time,AirTemp,Rainfall,TrackTemp,Datetime
62,0 days 01:02:45.730000,27.3,False,31.2,2023-03-05 19:02:45.730
64,0 days 01:04:45.759000,27.3,False,31.1,2023-03-05 19:04:45.759


In [111]:
teste.set_index("Datetime", inplace=True)

In [127]:
teste

Unnamed: 0_level_0,Time,AirTemp,Rainfall,TrackTemp
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-03-05 19:02:45.730,0 days 01:02:45.730000,27.3,False,31.2
2023-03-05 19:04:45.759,0 days 01:04:45.759000,27.3,False,31.1
2023-03-05 19:06:45.757,0 days 01:06:45.757000,27.3,False,31.2
2023-03-05 19:07:45.756,0 days 01:07:45.756000,27.3,False,31.2
2023-03-05 19:09:45.754,0 days 01:09:45.754000,27.2,False,31.0
...,...,...,...,...
2023-03-05 19:16:45.809,0 days 01:16:45.809000,27.2,False,30.8
2023-03-05 19:18:45.822,0 days 01:18:45.822000,27.1,False,30.8
2023-03-05 19:19:45.836,0 days 01:19:45.836000,27.2,False,30.6
2023-03-05 19:21:45.834,0 days 01:21:45.834000,27.1,False,30.6


In [146]:
teste.to_csv("teste.csv")

In [143]:
resampled_teste = teste.resample('2s').agg({
    'AirTemp': 'mean',
    'TrackTemp': 'mean',
    "Rainfall": 'mean',
}).reset_index()

In [147]:
resampled_teste

Unnamed: 0,Datetime,AirTemp,TrackTemp,Rainfall
0,2023-03-05 19:02:44,27.3,31.2,0.0
1,2023-03-05 19:02:46,0.0,0.0,0.0
2,2023-03-05 19:02:48,0.0,0.0,0.0
3,2023-03-05 19:02:50,0.0,0.0,0.0
4,2023-03-05 19:02:52,0.0,0.0,0.0
...,...,...,...,...
2817,2023-03-05 20:36:38,0.0,0.0,0.0
2818,2023-03-05 20:36:40,0.0,0.0,0.0
2819,2023-03-05 20:36:42,0.0,0.0,0.0
2820,2023-03-05 20:36:44,0.0,0.0,0.0


In [141]:
resampled_teste["Rainfall"] = resampled_teste["Rainfall"].fillna(0)
resampled_teste["Rainfall"] = np.where(resampled_teste["Rainfall"] > 0, 1, 0)

In [142]:
resampled_teste

Unnamed: 0,Datetime,AirTemp,TrackTemp,Rainfall
0,2023-03-05 19:02:45,27.3,31.2,0
1,2023-03-05 19:02:46,0.0,0.0,0
2,2023-03-05 19:02:47,0.0,0.0,0
3,2023-03-05 19:02:48,0.0,0.0,0
4,2023-03-05 19:02:49,0.0,0.0,0
...,...,...,...,...
5637,2023-03-05 20:36:42,0.0,0.0,0
5638,2023-03-05 20:36:43,0.0,0.0,0
5639,2023-03-05 20:36:44,0.0,0.0,0
5640,2023-03-05 20:36:45,0.0,0.0,0


In [116]:
df_weather = pd.read_csv("weather_round1_2023.csv")
df_telemetry = pd.read_csv("telemetry_ver_2023")

In [121]:
df_weather.head()

Unnamed: 0,Time,AirTemp,Rainfall,TrackTemp
0,0 days 01:02:45.730000,27.3,False,31.2
1,0 days 01:04:45.759000,27.3,False,31.1
2,0 days 01:06:45.757000,27.3,False,31.2
3,0 days 01:07:45.756000,27.3,False,31.2
4,0 days 01:09:45.754000,27.2,False,31.0


In [92]:
df_merge_3_4 = pd.merge(df_weather, df_telemetry, on="Time", how="outer")

In [93]:
df_merge_3_4

Unnamed: 0,Time,AirTemp,Rainfall,TrackTemp,RPM,Speed,Throttle,Brake,X,Y,Z
0,2023-03-05 18:00:00,,,,8697.111111,3.111111,16.000000,0.0,-279.940578,3560.866992,-156.981553
1,2023-03-05 18:00:01,,,,5061.777778,34.555556,22.888889,0.0,-277.852467,3622.587074,-158.078033
2,2023-03-05 18:00:02,,,,8579.142857,75.000000,60.571429,0.0,-272.291164,3767.698060,-158.063964
3,2023-03-05 18:00:03,,,,11837.500000,106.375000,88.125000,0.0,-264.214239,3981.448717,-158.350138
4,2023-03-05 18:00:04,,,,11345.666667,148.000000,100.000000,0.0,-251.227163,4341.312059,-158.195231
...,...,...,...,...,...,...,...,...,...,...,...
6689,2023-03-05 20:36:46.050,26.3,False,28.7,,,,,,,
6690,2023-03-05 20:36:46.050,26.3,False,28.7,,,,,,,
6691,2023-03-05 20:36:46.050,26.3,False,28.7,,,,,,,
6692,2023-03-05 20:36:46.050,26.3,False,28.7,,,,,,,
