In [1]:
import pandas as pd
import numpy as np
import fastf1 as ff1
from datetime import datetime as dt

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)

In [2]:
# get the schedule df's by year

def get_dataframe_schedule(year):
    df = ff1.get_event_schedule(year)
    return (
        df
        .drop(columns=["Location", "OfficialEventName", "Session1Date", "Session1DateUtc", "Session2Date", "Session2DateUtc", "Session3Date", "Session3DateUtc",
                       "Session4Date", "Session4DateUtc", "Session5Date", "Session5DateUtc", "F1ApiSupport"])
        .loc[~df['EventName'].str.contains('Pre-Season', na=False)]
        .reset_index(drop=True)
    )

In [3]:
df1 = get_dataframe_schedule(2024)
df1



Unnamed: 0,RoundNumber,Country,EventDate,EventName,EventFormat,Session1,Session2,Session3,Session4,Session5
0,1,Bahrain,2024-03-02,Bahrain Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race
1,2,Saudi Arabia,2024-03-09,Saudi Arabian Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race
2,3,Australia,2024-03-24,Australian Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race
3,4,Japan,2024-04-07,Japanese Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race
4,5,China,2024-04-21,Chinese Grand Prix,sprint_qualifying,Practice 1,Sprint Qualifying,Sprint,Qualifying,Race
5,6,United States,2024-05-05,Miami Grand Prix,sprint_qualifying,Practice 1,Sprint Qualifying,Sprint,Qualifying,Race
6,7,Italy,2024-05-19,Emilia Romagna Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race
7,8,Monaco,2024-05-26,Monaco Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race
8,9,Canada,2024-06-09,Canadian Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race
9,10,Spain,2024-06-23,Spanish Grand Prix,conventional,Practice 1,Practice 2,Practice 3,Qualifying,Race


In [4]:
# get the race results df's

def get_race(year, gp):
    race = ff1.get_session(year, gp, "R")
    race.load(laps=False, telemetry=False, weather=False, messages=False, livedata=False)
    race_results_df = race.results
    race_results_df = race_results_df.drop(columns=["BroadcastName", "Abbreviation", "DriverId", "TeamColor", "TeamId", "FirstName", "LastName", "HeadshotUrl", "CountryCode", "Q1", "Q2", "Q3", "Time"])
    df_event = get_dataframe_schedule(year)
    event_name = df_event.at[gp-1, "EventName"]
    race_results_df["EventName"] = event_name
    return race_results_df

In [None]:
# concat the results df's

race_dataframes = []
for race in range(1, df1["RoundNumber"].max() + 1):
    df_race = get_race(2023, race)
    race_dataframes.append(df_race)

df_race_concat = pd.concat(race_dataframes, axis=0, ignore_index=True)

In [5]:
# get laps data from race

def get_laps_data(year, gp):
    race = ff1.get_session(year, gp, "R")
    race.load(telemetry=False, weather=False, messages=False, livedata=False)
    df_laps = race.laps
    df = df_laps.drop(columns=["Time", "PitOutTime", "PitInTime", "Sector1SessionTime", "Sector2SessionTime", "Sector3SessionTime", "SpeedFL", "SpeedST",
                           "IsPersonalBest", "LapStartTime", "LapStartDate", "TrackStatus", "DeletedReason", "FastF1Generated", "IsAccurate"])
    return df

In [None]:
laps_dataframe = []
for race in range(1, df1["RoundNumber"].max() + 1):
    lap = get_laps_data(2023, race)
    laps_dataframe.append(lap)


laps_df_concat = pd.concat(laps_dataframe, axis=0, ignore_index=True)

In [6]:
season = dt.now().year
events = ff1.get_event_schedule(season)
today = pd.Timestamp.now()
past_events = events[events["EventDate"] <= today]

In [7]:
past_events

Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,Session2,Session2Date,Session2DateUtc,Session3,Session3Date,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport
0,0,Bahrain,Sakhir,FORMULA 1 ARAMCO PRE-SEASON TESTING 2024,2024-02-23,Pre-Season Testing,testing,Practice 1,2024-02-21 10:00:00+03:00,2024-02-21 07:00:00,Practice 2,2024-02-22 10:00:00+03:00,2024-02-22 07:00:00,Practice 3,2024-02-23 10:00:00+03:00,2024-02-23 07:00:00,,NaT,NaT,,NaT,NaT,True
1,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True
2,2,Saudi Arabia,Jeddah,FORMULA 1 STC SAUDI ARABIAN GRAND PRIX 2024,2024-03-09,Saudi Arabian Grand Prix,conventional,Practice 1,2024-03-07 16:30:00+03:00,2024-03-07 13:30:00,Practice 2,2024-03-07 20:10:00+03:00,2024-03-07 17:10:00,Practice 3,2024-03-08 16:30:00+03:00,2024-03-08 13:30:00,Qualifying,2024-03-08 20:00:00+03:00,2024-03-08 17:00:00,Race,2024-03-09 20:00:00+03:00,2024-03-09 17:00:00,True
3,3,Australia,Melbourne,FORMULA 1 ROLEX AUSTRALIAN GRAND PRIX 2024,2024-03-24,Australian Grand Prix,conventional,Practice 1,2024-03-22 12:30:00+11:00,2024-03-22 01:30:00,Practice 2,2024-03-22 16:00:00+11:00,2024-03-22 05:00:00,Practice 3,2024-03-23 12:30:00+11:00,2024-03-23 01:30:00,Qualifying,2024-03-23 16:00:00+11:00,2024-03-23 05:00:00,Race,2024-03-24 15:00:00+11:00,2024-03-24 04:00:00,True
4,4,Japan,Suzuka,FORMULA 1 MSC CRUISES JAPANESE GRAND PRIX 2024,2024-04-07,Japanese Grand Prix,conventional,Practice 1,2024-04-05 11:30:00+09:00,2024-04-05 02:30:00,Practice 2,2024-04-05 15:00:00+09:00,2024-04-05 06:00:00,Practice 3,2024-04-06 11:30:00+09:00,2024-04-06 02:30:00,Qualifying,2024-04-06 15:00:00+09:00,2024-04-06 06:00:00,Race,2024-04-07 14:00:00+09:00,2024-04-07 05:00:00,True
5,5,China,Shanghai,FORMULA 1 LENOVO CHINESE GRAND PRIX 2024,2024-04-21,Chinese Grand Prix,sprint_qualifying,Practice 1,2024-04-19 11:30:00+08:00,2024-04-19 03:30:00,Sprint Qualifying,2024-04-19 15:30:00+08:00,2024-04-19 07:30:00,Sprint,2024-04-20 11:00:00+08:00,2024-04-20 03:00:00,Qualifying,2024-04-20 15:00:00+08:00,2024-04-20 07:00:00,Race,2024-04-21 15:00:00+08:00,2024-04-21 07:00:00,True
6,6,United States,Miami,FORMULA 1 CRYPTO.COM MIAMI GRAND PRIX 2024,2024-05-05,Miami Grand Prix,sprint_qualifying,Practice 1,2024-05-03 12:30:00-04:00,2024-05-03 16:30:00,Sprint Qualifying,2024-05-03 16:30:00-04:00,2024-05-03 20:30:00,Sprint,2024-05-04 12:00:00-04:00,2024-05-04 16:00:00,Qualifying,2024-05-04 16:00:00-04:00,2024-05-04 20:00:00,Race,2024-05-05 16:00:00-04:00,2024-05-05 20:00:00,True
7,7,Italy,Imola,FORMULA 1 MSC CRUISES GRAN PREMIO DEL MADE IN ITALY E DELL'EMILIA-ROMAGNA 2024,2024-05-19,Emilia Romagna Grand Prix,conventional,Practice 1,2024-05-17 13:30:00+02:00,2024-05-17 11:30:00,Practice 2,2024-05-17 17:00:00+02:00,2024-05-17 15:00:00,Practice 3,2024-05-18 12:30:00+02:00,2024-05-18 10:30:00,Qualifying,2024-05-18 16:00:00+02:00,2024-05-18 14:00:00,Race,2024-05-19 15:00:00+02:00,2024-05-19 13:00:00,True
8,8,Monaco,Monaco,FORMULA 1 GRAND PRIX DE MONACO 2024,2024-05-26,Monaco Grand Prix,conventional,Practice 1,2024-05-24 13:30:00+02:00,2024-05-24 11:30:00,Practice 2,2024-05-24 17:00:00+02:00,2024-05-24 15:00:00,Practice 3,2024-05-25 12:30:00+02:00,2024-05-25 10:30:00,Qualifying,2024-05-25 16:00:00+02:00,2024-05-25 14:00:00,Race,2024-05-26 15:00:00+02:00,2024-05-26 13:00:00,True
9,9,Canada,Montréal,FORMULA 1 AWS GRAND PRIX DU CANADA 2024,2024-06-09,Canadian Grand Prix,conventional,Practice 1,2024-06-07 13:30:00-04:00,2024-06-07 17:30:00,Practice 2,2024-06-07 17:00:00-04:00,2024-06-07 21:00:00,Practice 3,2024-06-08 12:30:00-04:00,2024-06-08 16:30:00,Qualifying,2024-06-08 16:00:00-04:00,2024-06-08 20:00:00,Race,2024-06-09 14:00:00-04:00,2024-06-09 18:00:00,True


In [None]:
race_dataframes = []
laps_dataframe = []
for race in range(1, past_events["RoundNumber"].max() + 1):
    df_race = get_race(2024, race)
    lap = get_laps_data(2024, race)

    if pd.isna(df_race["Position"].iloc[0]):
        max_lap_number = lap['LapNumber'].max()
        last_lap = lap[lap["LapNumber"] == max_lap_number]
        last_positions = last_lap.set_index('DriverNumber')['Position']
        df_race["Position"] = df_race["DriverNumber"].map(last_positions).fillna(0).astype(int)
        df_race['IsZero'] = df_race['Position'] == 0
        df_race = df_race.sort_values(by=['IsZero', 'Position']).reset_index(drop=True)
        df_race = df_race.drop(columns=['IsZero'])

    race_dataframes.append(df_race)
    laps_dataframe.append(lap)

df_race_concat = pd.concat(race_dataframes, axis=0, ignore_index=True)
laps_df_concat = pd.concat(laps_dataframe, axis=0, ignore_index=True)

df_merge_schedule_results = pd.merge(past_events, df_race_concat, on="EventName")
df_merge_all = pd.merge(df_merge_schedule_results, laps_df_concat, on="DriverNumber")

In [None]:
df_merge_all

In [None]:
df_merge_all.to_csv("race_results_laps_2024.csv")

In [2]:
df = pd.read_csv("race_results_laps_2024.csv")

  df = pd.read_csv("race_results_laps_2024.csv")


In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,Session2,Session2Date,Session2DateUtc,Session3,Session3Date,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport,DriverNumber,TeamName,FullName,Position_x,ClassifiedPosition,GridPosition,Status,Points,Driver,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,SpeedI1,SpeedI2,Compound,TyreLife,FreshTyre,Team,Position_y,Deleted
0,0,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:37.284000,1.0,1.0,,0 days 00:00:41.266000,0 days 00:00:23.616000,234.0,250.0,SOFT,4.0,False,Red Bull Racing,1.0,
1,1,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:36.296000,2.0,1.0,0 days 00:00:30.916000,0 days 00:00:41.661000,0 days 00:00:23.719000,232.0,248.0,SOFT,5.0,False,Red Bull Racing,1.0,
2,2,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:36.753000,3.0,1.0,0 days 00:00:30.999000,0 days 00:00:41.966000,0 days 00:00:23.788000,231.0,243.0,SOFT,6.0,False,Red Bull Racing,1.0,
3,3,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:36.647000,4.0,1.0,0 days 00:00:30.931000,0 days 00:00:41.892000,0 days 00:00:23.824000,233.0,253.0,SOFT,7.0,False,Red Bull Racing,1.0,
4,4,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:37.173000,5.0,1.0,0 days 00:00:31.255000,0 days 00:00:42.056000,0 days 00:00:23.862000,231.0,245.0,SOFT,8.0,False,Red Bull Racing,1.0,


In [4]:
df.isnull().sum()

Unnamed: 0                 0
RoundNumber                0
Country                    0
Location                   0
OfficialEventName          0
EventDate                  0
EventName                  0
EventFormat                0
Session1                   0
Session1Date               0
Session1DateUtc            0
Session2                   0
Session2Date               0
Session2DateUtc            0
Session3                   0
Session3Date               0
Session3DateUtc            0
Session4                   0
Session4Date               0
Session4DateUtc            0
Session5                   0
Session5Date               0
Session5DateUtc            0
F1ApiSupport               0
DriverNumber               0
TeamName                   0
FullName                   0
Position_x                 0
ClassifiedPosition     29612
GridPosition           29612
Status                 29612
Points                 29612
Driver                     0
LapTime                 1949
LapNumber     

In [5]:
# mean speed

def fill_na_speed(df, cols):
    for col in cols:
        mean_values = df.groupby(["EventName", "FullName"])[col].transform('mean')
        df[col].fillna(mean_values, inplace=True)
    return df

In [6]:
columns = ["SpeedI1", "SpeedI2"]

In [7]:
df_copy = df.copy()

In [8]:
df_filled = fill_na_speed(df_copy, columns)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(mean_values, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(mean_values, inplace=True)


In [9]:
df_filled.head(6)

Unnamed: 0.1,Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,Session2,Session2Date,Session2DateUtc,Session3,Session3Date,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport,DriverNumber,TeamName,FullName,Position_x,ClassifiedPosition,GridPosition,Status,Points,Driver,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,SpeedI1,SpeedI2,Compound,TyreLife,FreshTyre,Team,Position_y,Deleted
0,0,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:37.284000,1.0,1.0,,0 days 00:00:41.266000,0 days 00:00:23.616000,234.0,250.0,SOFT,4.0,False,Red Bull Racing,1.0,
1,1,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:36.296000,2.0,1.0,0 days 00:00:30.916000,0 days 00:00:41.661000,0 days 00:00:23.719000,232.0,248.0,SOFT,5.0,False,Red Bull Racing,1.0,
2,2,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:36.753000,3.0,1.0,0 days 00:00:30.999000,0 days 00:00:41.966000,0 days 00:00:23.788000,231.0,243.0,SOFT,6.0,False,Red Bull Racing,1.0,
3,3,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:36.647000,4.0,1.0,0 days 00:00:30.931000,0 days 00:00:41.892000,0 days 00:00:23.824000,233.0,253.0,SOFT,7.0,False,Red Bull Racing,1.0,
4,4,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:37.173000,5.0,1.0,0 days 00:00:31.255000,0 days 00:00:42.056000,0 days 00:00:23.862000,231.0,245.0,SOFT,8.0,False,Red Bull Racing,1.0,
5,5,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,0 days 00:01:37.092000,6.0,1.0,0 days 00:00:31.041000,0 days 00:00:42.187000,0 days 00:00:23.864000,245.586031,247.0,SOFT,9.0,False,Red Bull Racing,1.0,


In [10]:
df_filled[["SpeedI1", "SpeedI2"]].isna().sum()

SpeedI1    0
SpeedI2    0
dtype: int64

In [11]:
df_filled["Sector1Time"] = pd.to_timedelta(df["Sector1Time"])
df_filled["Sector2Time"] = pd.to_timedelta(df["Sector2Time"])
df_filled["Sector3Time"] = pd.to_timedelta(df["Sector3Time"])

In [12]:
def fill_na_sectorTime(df, cols):
    for col in cols:
        mean_values = df.groupby(["EventName", "FullName"])[col].transform(lambda x: x.dropna().mean())
        df[col].fillna(mean_values, inplace=True)
    return df

In [13]:
columns_sectorTime = ['Sector1Time', 'Sector2Time', 'Sector3Time']

In [14]:
df_filled = fill_na_sectorTime(df_filled, columns_sectorTime)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(mean_values, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(mean_values, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always be

In [15]:
df_filled[['Sector1Time', 'Sector2Time', 'Sector3Time']].isna().sum()

Sector1Time    0
Sector2Time    0
Sector3Time    0
dtype: int64

In [16]:
df_filled.isna().sum()

Unnamed: 0                 0
RoundNumber                0
Country                    0
Location                   0
OfficialEventName          0
EventDate                  0
EventName                  0
EventFormat                0
Session1                   0
Session1Date               0
Session1DateUtc            0
Session2                   0
Session2Date               0
Session2DateUtc            0
Session3                   0
Session3Date               0
Session3DateUtc            0
Session4                   0
Session4Date               0
Session4DateUtc            0
Session5                   0
Session5Date               0
Session5DateUtc            0
F1ApiSupport               0
DriverNumber               0
TeamName                   0
FullName                   0
Position_x                 0
ClassifiedPosition     29612
GridPosition           29612
Status                 29612
Points                 29612
Driver                     0
LapTime                 1949
LapNumber     

In [17]:
df_filled[df_filled["LapTime"].isnull()]

Unnamed: 0.1,Unnamed: 0,RoundNumber,Country,Location,OfficialEventName,EventDate,EventName,EventFormat,Session1,Session1Date,Session1DateUtc,Session2,Session2Date,Session2DateUtc,Session3,Session3Date,Session3DateUtc,Session4,Session4Date,Session4DateUtc,Session5,Session5Date,Session5DateUtc,F1ApiSupport,DriverNumber,TeamName,FullName,Position_x,ClassifiedPosition,GridPosition,Status,Points,Driver,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,SpeedI1,SpeedI2,Compound,TyreLife,FreshTyre,Team,Position_y,Deleted
64,64,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,,8.0,2.0,0 days 00:01:13.947000,0 days 00:00:57.475000,0 days 00:00:57.661000,158.000000,96.00000,HARD,1.0,True,Red Bull Racing,2.0,
65,65,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,,9.0,2.0,0 days 00:00:56.642000,0 days 00:00:49.042000,0 days 00:00:55.311000,183.000000,131.00000,HARD,2.0,True,Red Bull Racing,2.0,
110,110,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,,4.0,1.0,0 days 00:00:28.756000,0 days 00:00:18.353000,0 days 00:00:27.164187919,270.000000,308.00000,MEDIUM,4.0,True,Red Bull Racing,19.0,
112,112,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,,2.0,2.0,0 days 00:00:27.778819672,0 days 00:00:56.617000,0 days 00:00:39.032000,179.000000,245.00000,MEDIUM,2.0,False,Red Bull Racing,1.0,
113,113,1,Bahrain,Sakhir,FORMULA 1 GULF AIR BAHRAIN GRAND PRIX 2024,2024-03-02 00:00:00,Bahrain Grand Prix,conventional,Practice 1,2024-02-29 14:30:00+03:00,2024-02-29 11:30:00,Practice 2,2024-02-29 18:00:00+03:00,2024-02-29 15:00:00,Practice 3,2024-03-01 15:30:00+03:00,2024-03-01 12:30:00,Qualifying,2024-03-01 19:00:00+03:00,2024-03-01 16:00:00,Race,2024-03-02 18:00:00+03:00,2024-03-02 15:00:00,True,1,Red Bull Racing,Max Verstappen,1.0,1,1.0,Finished,26.0,VER,,3.0,2.0,0 days 00:00:27.778819672,0 days 00:00:42.768000,0 days 00:00:18.603000,279.000000,291.00000,MEDIUM,3.0,False,Red Bull Racing,1.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190225,190225,13,Hungary,Budapest,FORMULA 1 HUNGARIAN GRAND PRIX 2024,2024-07-21 00:00:00,Hungarian Grand Prix,conventional,Practice 1,2024-07-19 13:30:00+02:00,2024-07-19 11:30:00,Practice 2,2024-07-19 17:00:00+02:00,2024-07-19 15:00:00,Practice 3,2024-07-20 12:30:00+02:00,2024-07-20 10:30:00,Qualifying,2024-07-20 16:00:00+02:00,2024-07-20 14:00:00,Race,2024-07-21 15:00:00+02:00,2024-07-21 13:00:00,True,24,Kick Sauber,Guanyu Zhou,19.0,19,18.0,+1 Lap,0.0,ZHO,,1.0,1.0,0 days 00:00:28.227723433,0 days 00:00:35.143000,0 days 00:00:25.168000,243.000000,240.00000,SOFT,1.0,True,Kick Sauber,17.0,
190594,190594,13,Hungary,Budapest,FORMULA 1 HUNGARIAN GRAND PRIX 2024,2024-07-21 00:00:00,Hungarian Grand Prix,conventional,Practice 1,2024-07-19 13:30:00+02:00,2024-07-19 11:30:00,Practice 2,2024-07-19 17:00:00+02:00,2024-07-19 15:00:00,Practice 3,2024-07-20 12:30:00+02:00,2024-07-20 10:30:00,Qualifying,2024-07-20 16:00:00+02:00,2024-07-20 14:00:00,Race,2024-07-21 15:00:00+02:00,2024-07-21 13:00:00,True,10,Alpine,Pierre Gasly,20.0,R,0.0,Hydraulics,0.0,GAS,,2.0,2.0,0 days 00:00:27.410062015,0 days 00:00:48.342000,0 days 00:00:45.231000,261.000000,273.00000,HARD,1.0,True,Alpine,13.0,
190897,190897,13,Hungary,Budapest,FORMULA 1 HUNGARIAN GRAND PRIX 2024,2024-07-21 00:00:00,Hungarian Grand Prix,conventional,Practice 1,2024-07-19 13:30:00+02:00,2024-07-19 11:30:00,Practice 2,2024-07-19 17:00:00+02:00,2024-07-19 15:00:00,Practice 3,2024-07-20 12:30:00+02:00,2024-07-20 10:30:00,Qualifying,2024-07-20 16:00:00+02:00,2024-07-20 14:00:00,Race,2024-07-21 15:00:00+02:00,2024-07-21 13:00:00,True,10,Alpine,Pierre Gasly,20.0,R,0.0,Hydraulics,0.0,GAS,,1.0,1.0,0 days 00:00:27.410062015,0 days 00:00:34.159000,0 days 00:00:42.473000,163.000000,227.00000,INTERMEDIATE,1.0,True,Alpine,18.0,
190967,190967,13,Hungary,Budapest,FORMULA 1 HUNGARIAN GRAND PRIX 2024,2024-07-21 00:00:00,Hungarian Grand Prix,conventional,Practice 1,2024-07-19 13:30:00+02:00,2024-07-19 11:30:00,Practice 2,2024-07-19 17:00:00+02:00,2024-07-19 15:00:00,Practice 3,2024-07-20 12:30:00+02:00,2024-07-20 10:30:00,Qualifying,2024-07-20 16:00:00+02:00,2024-07-20 14:00:00,Race,2024-07-21 15:00:00+02:00,2024-07-21 13:00:00,True,10,Alpine,Pierre Gasly,20.0,R,0.0,Hydraulics,0.0,GAS,,1.0,1.0,0 days 00:00:27.410062015,0 days 00:00:33.021000,0 days 00:00:24.482000,247.000000,263.00000,SOFT,4.0,False,Alpine,7.0,


In [18]:
def fill_na_laptime(df, cols):
    for col in cols:
        mean_values = df.groupby(["EventName", "FullName"])[col].transform(lambda x: x.dropna().mean())
        df[col].fillna(mean_values, inplace=True)
    return df

In [19]:
df_filled["LapTime"] = pd.to_timedelta(df_filled["LapTime"])

In [20]:
laptime_column = ["LapTime"]

In [21]:
df_filled = fill_na_laptime(df_filled, laptime_column)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(mean_values, inplace=True)


In [22]:
df_filled.isna().sum()

Unnamed: 0                 0
RoundNumber                0
Country                    0
Location                   0
OfficialEventName          0
EventDate                  0
EventName                  0
EventFormat                0
Session1                   0
Session1Date               0
Session1DateUtc            0
Session2                   0
Session2Date               0
Session2DateUtc            0
Session3                   0
Session3Date               0
Session3DateUtc            0
Session4                   0
Session4Date               0
Session4DateUtc            0
Session5                   0
Session5Date               0
Session5DateUtc            0
F1ApiSupport               0
DriverNumber               0
TeamName                   0
FullName                   0
Position_x                 0
ClassifiedPosition     29612
GridPosition           29612
Status                 29612
Points                 29612
Driver                     0
LapTime                    0
LapNumber     

In [23]:
df_filled = df_filled.drop(columns=["Unnamed: 0", "RoundNumber", "Country", "Location", "OfficialEventName", "EventDate", "EventFormat", "Session1", "Session1Date", "Session1DateUtc",
                                    "Session2", "Session2Date", "Session2DateUtc", "Session3", "Session3Date", "Session3DateUtc", "Session4", "Session4Date", "Session4DateUtc", "Session5", "Session5Date", "F1ApiSupport",
                                    "DriverNumber", "TeamName", "Driver", "Team", "Deleted"])

In [24]:
df_filled

Unnamed: 0,EventName,Session5DateUtc,FullName,Position_x,ClassifiedPosition,GridPosition,Status,Points,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,SpeedI1,SpeedI2,Compound,TyreLife,FreshTyre,Position_y
0,Bahrain Grand Prix,2024-03-02 15:00:00,Max Verstappen,1.0,1,1.0,Finished,26.0,0 days 00:01:37.284000,1.0,1.0,0 days 00:00:27.778819672,0 days 00:00:41.266000,0 days 00:00:23.616000,234.000000,250.0,SOFT,4.0,False,1.0
1,Bahrain Grand Prix,2024-03-02 15:00:00,Max Verstappen,1.0,1,1.0,Finished,26.0,0 days 00:01:36.296000,2.0,1.0,0 days 00:00:30.916000,0 days 00:00:41.661000,0 days 00:00:23.719000,232.000000,248.0,SOFT,5.0,False,1.0
2,Bahrain Grand Prix,2024-03-02 15:00:00,Max Verstappen,1.0,1,1.0,Finished,26.0,0 days 00:01:36.753000,3.0,1.0,0 days 00:00:30.999000,0 days 00:00:41.966000,0 days 00:00:23.788000,231.000000,243.0,SOFT,6.0,False,1.0
3,Bahrain Grand Prix,2024-03-02 15:00:00,Max Verstappen,1.0,1,1.0,Finished,26.0,0 days 00:01:36.647000,4.0,1.0,0 days 00:00:30.931000,0 days 00:00:41.892000,0 days 00:00:23.824000,233.000000,253.0,SOFT,7.0,False,1.0
4,Bahrain Grand Prix,2024-03-02 15:00:00,Max Verstappen,1.0,1,1.0,Finished,26.0,0 days 00:01:37.173000,5.0,1.0,0 days 00:00:31.255000,0 days 00:00:42.056000,0 days 00:00:23.862000,231.000000,245.0,SOFT,8.0,False,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191133,Hungarian Grand Prix,2024-07-21 13:00:00,Pierre Gasly,20.0,R,0.0,Hydraulics,0.0,0 days 00:01:41.671000,29.0,2.0,0 days 00:00:47.902000,0 days 00:00:30.206000,0 days 00:00:23.563000,279.000000,235.0,MEDIUM,1.0,True,19.0
191134,Hungarian Grand Prix,2024-07-21 13:00:00,Pierre Gasly,20.0,R,0.0,Hydraulics,0.0,0 days 00:01:23.446000,30.0,2.0,0 days 00:00:29.727000,0 days 00:00:30.081000,0 days 00:00:23.638000,276.000000,237.0,MEDIUM,2.0,True,17.0
191135,Hungarian Grand Prix,2024-07-21 13:00:00,Pierre Gasly,20.0,R,0.0,Hydraulics,0.0,0 days 00:01:23.340000,31.0,2.0,0 days 00:00:29.754000,0 days 00:00:30.046000,0 days 00:00:23.540000,277.000000,240.0,MEDIUM,3.0,True,16.0
191136,Hungarian Grand Prix,2024-07-21 13:00:00,Pierre Gasly,20.0,R,0.0,Hydraulics,0.0,0 days 00:01:23.484000,32.0,2.0,0 days 00:00:29.652000,0 days 00:00:30.176000,0 days 00:00:23.656000,241.721925,241.0,MEDIUM,4.0,True,15.0


In [25]:
cols_to_fill = ["ClassifiedPosition", "GridPosition", "Points"]

for col in cols_to_fill:
    df_filled.loc[df_filled["Status"] != "Finished", col] = df_filled.loc[df_filled["Status"] != "Finished", col].fillna(0)

In [26]:
df_filled.isnull().sum()

EventName                 0
Session5DateUtc           0
FullName                  0
Position_x                0
ClassifiedPosition        0
GridPosition              0
Status                29612
Points                    0
LapTime                   0
LapNumber                 0
Stint                     0
Sector1Time               0
Sector2Time               0
Sector3Time               0
SpeedI1                   0
SpeedI2                   0
Compound                  0
TyreLife                  0
FreshTyre                 0
Position_y              102
dtype: int64

In [27]:
df_filled["Status"] = np.where(df_filled["Status"] == "Finished", 1, 0)

In [28]:
df_filled.ClassifiedPosition.unique()

array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
       20, '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13',
       '14', '15', '16', '17', '18', 'R', '1', '2', '19', 0, 'W'],
      dtype=object)

In [29]:
values_to_replace = ["R", "D", "E", "W", "F", "N"]

In [30]:
df_filled["ClassifiedPosition"] = np.where(df_filled["ClassifiedPosition"].isin(values_to_replace), 0, df_filled["ClassifiedPosition"])

In [31]:
df_filled

Unnamed: 0,EventName,Session5DateUtc,FullName,Position_x,ClassifiedPosition,GridPosition,Status,Points,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,SpeedI1,SpeedI2,Compound,TyreLife,FreshTyre,Position_y
0,Bahrain Grand Prix,2024-03-02 15:00:00,Max Verstappen,1.0,1,1.0,1,26.0,0 days 00:01:37.284000,1.0,1.0,0 days 00:00:27.778819672,0 days 00:00:41.266000,0 days 00:00:23.616000,234.000000,250.0,SOFT,4.0,False,1.0
1,Bahrain Grand Prix,2024-03-02 15:00:00,Max Verstappen,1.0,1,1.0,1,26.0,0 days 00:01:36.296000,2.0,1.0,0 days 00:00:30.916000,0 days 00:00:41.661000,0 days 00:00:23.719000,232.000000,248.0,SOFT,5.0,False,1.0
2,Bahrain Grand Prix,2024-03-02 15:00:00,Max Verstappen,1.0,1,1.0,1,26.0,0 days 00:01:36.753000,3.0,1.0,0 days 00:00:30.999000,0 days 00:00:41.966000,0 days 00:00:23.788000,231.000000,243.0,SOFT,6.0,False,1.0
3,Bahrain Grand Prix,2024-03-02 15:00:00,Max Verstappen,1.0,1,1.0,1,26.0,0 days 00:01:36.647000,4.0,1.0,0 days 00:00:30.931000,0 days 00:00:41.892000,0 days 00:00:23.824000,233.000000,253.0,SOFT,7.0,False,1.0
4,Bahrain Grand Prix,2024-03-02 15:00:00,Max Verstappen,1.0,1,1.0,1,26.0,0 days 00:01:37.173000,5.0,1.0,0 days 00:00:31.255000,0 days 00:00:42.056000,0 days 00:00:23.862000,231.000000,245.0,SOFT,8.0,False,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191133,Hungarian Grand Prix,2024-07-21 13:00:00,Pierre Gasly,20.0,0,0.0,0,0.0,0 days 00:01:41.671000,29.0,2.0,0 days 00:00:47.902000,0 days 00:00:30.206000,0 days 00:00:23.563000,279.000000,235.0,MEDIUM,1.0,True,19.0
191134,Hungarian Grand Prix,2024-07-21 13:00:00,Pierre Gasly,20.0,0,0.0,0,0.0,0 days 00:01:23.446000,30.0,2.0,0 days 00:00:29.727000,0 days 00:00:30.081000,0 days 00:00:23.638000,276.000000,237.0,MEDIUM,2.0,True,17.0
191135,Hungarian Grand Prix,2024-07-21 13:00:00,Pierre Gasly,20.0,0,0.0,0,0.0,0 days 00:01:23.340000,31.0,2.0,0 days 00:00:29.754000,0 days 00:00:30.046000,0 days 00:00:23.540000,277.000000,240.0,MEDIUM,3.0,True,16.0
191136,Hungarian Grand Prix,2024-07-21 13:00:00,Pierre Gasly,20.0,0,0.0,0,0.0,0 days 00:01:23.484000,32.0,2.0,0 days 00:00:29.652000,0 days 00:00:30.176000,0 days 00:00:23.656000,241.721925,241.0,MEDIUM,4.0,True,15.0


In [32]:
df_filled['Sector1Time'] = df_filled['Sector1Time'].dt.total_seconds()
df_filled['Sector2Time'] = df_filled['Sector2Time'].dt.total_seconds()
df_filled['Sector3Time'] = df_filled['Sector3Time'].dt.total_seconds()

In [33]:
df_filled['LapTime'] = df_filled['LapTime'].dt.total_seconds()

In [34]:
df_filled

Unnamed: 0,EventName,Session5DateUtc,FullName,Position_x,ClassifiedPosition,GridPosition,Status,Points,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,SpeedI1,SpeedI2,Compound,TyreLife,FreshTyre,Position_y
0,Bahrain Grand Prix,2024-03-02 15:00:00,Max Verstappen,1.0,1,1.0,1,26.0,97.284,1.0,1.0,27.77882,41.266,23.616,234.000000,250.0,SOFT,4.0,False,1.0
1,Bahrain Grand Prix,2024-03-02 15:00:00,Max Verstappen,1.0,1,1.0,1,26.0,96.296,2.0,1.0,30.91600,41.661,23.719,232.000000,248.0,SOFT,5.0,False,1.0
2,Bahrain Grand Prix,2024-03-02 15:00:00,Max Verstappen,1.0,1,1.0,1,26.0,96.753,3.0,1.0,30.99900,41.966,23.788,231.000000,243.0,SOFT,6.0,False,1.0
3,Bahrain Grand Prix,2024-03-02 15:00:00,Max Verstappen,1.0,1,1.0,1,26.0,96.647,4.0,1.0,30.93100,41.892,23.824,233.000000,253.0,SOFT,7.0,False,1.0
4,Bahrain Grand Prix,2024-03-02 15:00:00,Max Verstappen,1.0,1,1.0,1,26.0,97.173,5.0,1.0,31.25500,42.056,23.862,231.000000,245.0,SOFT,8.0,False,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191133,Hungarian Grand Prix,2024-07-21 13:00:00,Pierre Gasly,20.0,0,0.0,0,0.0,101.671,29.0,2.0,47.90200,30.206,23.563,279.000000,235.0,MEDIUM,1.0,True,19.0
191134,Hungarian Grand Prix,2024-07-21 13:00:00,Pierre Gasly,20.0,0,0.0,0,0.0,83.446,30.0,2.0,29.72700,30.081,23.638,276.000000,237.0,MEDIUM,2.0,True,17.0
191135,Hungarian Grand Prix,2024-07-21 13:00:00,Pierre Gasly,20.0,0,0.0,0,0.0,83.340,31.0,2.0,29.75400,30.046,23.540,277.000000,240.0,MEDIUM,3.0,True,16.0
191136,Hungarian Grand Prix,2024-07-21 13:00:00,Pierre Gasly,20.0,0,0.0,0,0.0,83.484,32.0,2.0,29.65200,30.176,23.656,241.721925,241.0,MEDIUM,4.0,True,15.0


In [35]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

categorical_columns = ["EventName", "FullName", "Compound"]
for col in categorical_columns:
    df_filled[col] = label_encoder.fit_transform(df_filled[col])

In [36]:
df_filled["FreshTyre"] = df_filled["FreshTyre"].astype(int)

In [37]:
df_filled

Unnamed: 0,EventName,Session5DateUtc,FullName,Position_x,ClassifiedPosition,GridPosition,Status,Points,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,SpeedI1,SpeedI2,Compound,TyreLife,FreshTyre,Position_y
0,2,2024-03-02 15:00:00,13,1.0,1,1.0,1,26.0,97.284,1.0,1.0,27.77882,41.266,23.616,234.000000,250.0,3,4.0,0,1.0
1,2,2024-03-02 15:00:00,13,1.0,1,1.0,1,26.0,96.296,2.0,1.0,30.91600,41.661,23.719,232.000000,248.0,3,5.0,0,1.0
2,2,2024-03-02 15:00:00,13,1.0,1,1.0,1,26.0,96.753,3.0,1.0,30.99900,41.966,23.788,231.000000,243.0,3,6.0,0,1.0
3,2,2024-03-02 15:00:00,13,1.0,1,1.0,1,26.0,96.647,4.0,1.0,30.93100,41.892,23.824,233.000000,253.0,3,7.0,0,1.0
4,2,2024-03-02 15:00:00,13,1.0,1,1.0,1,26.0,97.173,5.0,1.0,31.25500,42.056,23.862,231.000000,245.0,3,8.0,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191133,7,2024-07-21 13:00:00,17,20.0,0,0.0,0,0.0,101.671,29.0,2.0,47.90200,30.206,23.563,279.000000,235.0,2,1.0,1,19.0
191134,7,2024-07-21 13:00:00,17,20.0,0,0.0,0,0.0,83.446,30.0,2.0,29.72700,30.081,23.638,276.000000,237.0,2,2.0,1,17.0
191135,7,2024-07-21 13:00:00,17,20.0,0,0.0,0,0.0,83.340,31.0,2.0,29.75400,30.046,23.540,277.000000,240.0,2,3.0,1,16.0
191136,7,2024-07-21 13:00:00,17,20.0,0,0.0,0,0.0,83.484,32.0,2.0,29.65200,30.176,23.656,241.721925,241.0,2,4.0,1,15.0


In [38]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

df_filled[['LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time', 'SpeedI1', 'SpeedI2', 'TyreLife']] = scaler.fit_transform(
    df_filled[['LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time', 'SpeedI1', 'SpeedI2', 'TyreLife']]
)

In [39]:
df_filled = df_filled.dropna()

In [40]:
df_filled.isnull().sum()

EventName             0
Session5DateUtc       0
FullName              0
Position_x            0
ClassifiedPosition    0
GridPosition          0
Status                0
Points                0
LapTime               0
LapNumber             0
Stint                 0
Sector1Time           0
Sector2Time           0
Sector3Time           0
SpeedI1               0
SpeedI2               0
Compound              0
TyreLife              0
FreshTyre             0
Position_y            0
dtype: int64

In [41]:
df_filled

Unnamed: 0,EventName,Session5DateUtc,FullName,Position_x,ClassifiedPosition,GridPosition,Status,Points,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,SpeedI1,SpeedI2,Compound,TyreLife,FreshTyre,Position_y
0,2,2024-03-02 15:00:00,13,1.0,1,1.0,1,26.0,0.070413,1.0,1.0,-0.062816,1.197880,-0.532516,-0.436472,0.027647,3,-0.900151,0,1.0
1,2,2024-03-02 15:00:00,13,1.0,1,1.0,1,26.0,0.058025,2.0,1.0,0.413454,1.256385,-0.519926,-0.490961,-0.021169,3,-0.816672,0,1.0
2,2,2024-03-02 15:00:00,13,1.0,1,1.0,1,26.0,0.063755,3.0,1.0,0.426054,1.301560,-0.511492,-0.518206,-0.143208,3,-0.733194,0,1.0
3,2,2024-03-02 15:00:00,13,1.0,1,1.0,1,26.0,0.062426,4.0,1.0,0.415731,1.290600,-0.507091,-0.463717,0.100871,3,-0.649715,0,1.0
4,2,2024-03-02 15:00:00,13,1.0,1,1.0,1,26.0,0.069021,5.0,1.0,0.464919,1.314890,-0.502447,-0.518206,-0.094392,3,-0.566236,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191133,7,2024-07-21 13:00:00,17,20.0,0,0.0,0,0.0,0.125421,29.0,2.0,2.992176,-0.440264,-0.538994,0.789534,-0.338472,2,-1.150587,1,19.0
191134,7,2024-07-21 13:00:00,17,20.0,0,0.0,0,0.0,-0.103098,30.0,2.0,0.232946,-0.458778,-0.529827,0.707800,-0.289656,2,-1.067108,1,17.0
191135,7,2024-07-21 13:00:00,17,20.0,0,0.0,0,0.0,-0.104428,31.0,2.0,0.237045,-0.463962,-0.541805,0.735045,-0.216432,2,-0.983629,1,16.0
191136,7,2024-07-21 13:00:00,17,20.0,0,0.0,0,0.0,-0.102622,32.0,2.0,0.221560,-0.444708,-0.527626,-0.226092,-0.192024,2,-0.900151,1,15.0


In [43]:
df_filled["Session5DateUtc"] = pd.to_datetime(df["Session5DateUtc"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filled["Session5DateUtc"] = pd.to_datetime(df["Session5DateUtc"])


In [44]:
df_filled["Session5DateUtc"] = df_filled["Session5DateUtc"].apply(lambda x: 1 if 6 <= x.hour < 18 else 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filled["Session5DateUtc"] = df_filled["Session5DateUtc"].apply(lambda x: 1 if 6 <= x.hour < 18 else 0)


In [45]:
df_filled

Unnamed: 0,EventName,Session5DateUtc,FullName,Position_x,ClassifiedPosition,GridPosition,Status,Points,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,SpeedI1,SpeedI2,Compound,TyreLife,FreshTyre,Position_y
0,2,1,13,1.0,1,1.0,1,26.0,0.070413,1.0,1.0,-0.062816,1.197880,-0.532516,-0.436472,0.027647,3,-0.900151,0,1.0
1,2,1,13,1.0,1,1.0,1,26.0,0.058025,2.0,1.0,0.413454,1.256385,-0.519926,-0.490961,-0.021169,3,-0.816672,0,1.0
2,2,1,13,1.0,1,1.0,1,26.0,0.063755,3.0,1.0,0.426054,1.301560,-0.511492,-0.518206,-0.143208,3,-0.733194,0,1.0
3,2,1,13,1.0,1,1.0,1,26.0,0.062426,4.0,1.0,0.415731,1.290600,-0.507091,-0.463717,0.100871,3,-0.649715,0,1.0
4,2,1,13,1.0,1,1.0,1,26.0,0.069021,5.0,1.0,0.464919,1.314890,-0.502447,-0.518206,-0.094392,3,-0.566236,0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191133,7,1,17,20.0,0,0.0,0,0.0,0.125421,29.0,2.0,2.992176,-0.440264,-0.538994,0.789534,-0.338472,2,-1.150587,1,19.0
191134,7,1,17,20.0,0,0.0,0,0.0,-0.103098,30.0,2.0,0.232946,-0.458778,-0.529827,0.707800,-0.289656,2,-1.067108,1,17.0
191135,7,1,17,20.0,0,0.0,0,0.0,-0.104428,31.0,2.0,0.237045,-0.463962,-0.541805,0.735045,-0.216432,2,-0.983629,1,16.0
191136,7,1,17,20.0,0,0.0,0,0.0,-0.102622,32.0,2.0,0.221560,-0.444708,-0.527626,-0.226092,-0.192024,2,-0.900151,1,15.0


In [47]:
spa = ff1.get_session(2024, "Belgium Grand Prix", "R")
spa.load()

core           INFO 	Loading data for Belgian Grand Prix - Race [v3.3.9]
req            INFO 	Using cached data for session_info


req            INFO 	Using cached data for driver_info


KeyboardInterrupt: 

In [41]:
spa.results

Unnamed: 0,DriverNumber,BroadcastName,Abbreviation,DriverId,TeamName,TeamColor,TeamId,FirstName,LastName,FullName,HeadshotUrl,CountryCode,Position,ClassifiedPosition,GridPosition,Q1,Q2,Q3,Time,Status,Points
44,44,L HAMILTON,HAM,hamilton,Mercedes,27F4D2,mercedes,Lewis,Hamilton,Lewis Hamilton,https://media.formula1.com/d_driver_fallback_image.png/content/dam/fom-website/drivers/L/LEWHAM01_Lewis_Hamilton/lewham01.png.transform/1col/image.png,GBR,1.0,1,3.0,NaT,NaT,NaT,0 days 00:00:00.526000,Finished,25.0
81,81,O PIASTRI,PIA,piastri,McLaren,FF8000,mclaren,Oscar,Piastri,Oscar Piastri,https://media.formula1.com/d_driver_fallback_image.png/content/dam/fom-website/drivers/O/OSCPIA01_Oscar_Piastri/oscpia01.png.transform/1col/image.png,AUS,2.0,2,5.0,NaT,NaT,NaT,0 days 00:00:01.173000,Finished,18.0
16,16,C LECLERC,LEC,leclerc,Ferrari,E80020,ferrari,Charles,Leclerc,Charles Leclerc,https://media.formula1.com/d_driver_fallback_image.png/content/dam/fom-website/drivers/C/CHALEC01_Charles_Leclerc/chalec01.png.transform/1col/image.png,MON,3.0,3,1.0,NaT,NaT,NaT,0 days 00:00:08.549000,Finished,15.0
1,1,M VERSTAPPEN,VER,max_verstappen,Red Bull Racing,3671C6,red_bull,Max,Verstappen,Max Verstappen,https://media.formula1.com/d_driver_fallback_image.png/content/dam/fom-website/drivers/M/MAXVER01_Max_Verstappen/maxver01.png.transform/1col/image.png,NED,4.0,4,11.0,NaT,NaT,NaT,0 days 00:00:09.226000,Finished,12.0
4,4,L NORRIS,NOR,norris,McLaren,FF8000,mclaren,Lando,Norris,Lando Norris,https://media.formula1.com/d_driver_fallback_image.png/content/dam/fom-website/drivers/L/LANNOR01_Lando_Norris/lannor01.png.transform/1col/image.png,GBR,5.0,5,4.0,NaT,NaT,NaT,0 days 00:00:09.850000,Finished,10.0
55,55,C SAINZ,SAI,sainz,Ferrari,E80020,ferrari,Carlos,Sainz,Carlos Sainz,https://media.formula1.com/d_driver_fallback_image.png/content/dam/fom-website/drivers/C/CARSAI01_Carlos_Sainz/carsai01.png.transform/1col/image.png,ESP,6.0,6,7.0,NaT,NaT,NaT,0 days 00:00:19.795000,Finished,8.0
11,11,S PEREZ,PER,perez,Red Bull Racing,3671C6,red_bull,Sergio,Perez,Sergio Perez,https://media.formula1.com/d_driver_fallback_image.png/content/dam/fom-website/drivers/S/SERPER01_Sergio_Perez/serper01.png.transform/1col/image.png,MEX,7.0,7,2.0,NaT,NaT,NaT,0 days 00:00:43.195000,Finished,7.0
14,14,F ALONSO,ALO,alonso,Aston Martin,229971,aston_martin,Fernando,Alonso,Fernando Alonso,https://media.formula1.com/d_driver_fallback_image.png/content/dam/fom-website/drivers/F/FERALO01_Fernando_Alonso/feralo01.png.transform/1col/image.png,ESP,8.0,8,8.0,NaT,NaT,NaT,0 days 00:00:49.963000,Finished,4.0
31,31,E OCON,OCO,ocon,Alpine,0093cc,alpine,Esteban,Ocon,Esteban Ocon,https://media.formula1.com/d_driver_fallback_image.png/content/dam/fom-website/drivers/E/ESTOCO01_Esteban_Ocon/estoco01.png.transform/1col/image.png,FRA,9.0,9,9.0,NaT,NaT,NaT,0 days 00:00:52.552000,Finished,2.0
3,3,D RICCIARDO,RIC,ricciardo,RB,6692FF,rb,Daniel,Ricciardo,Daniel Ricciardo,https://media.formula1.com/d_driver_fallback_image.png/content/dam/fom-website/drivers/D/DANRIC01_Daniel_Ricciardo/danric01.png.transform/1col/image.png,AUS,10.0,10,13.0,NaT,NaT,NaT,0 days 00:00:54.926000,Finished,1.0


In [42]:
spa.laps

Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,Sector1SessionTime,Sector2SessionTime,Sector3SessionTime,SpeedI1,SpeedI2,SpeedFL,SpeedST,IsPersonalBest,Compound,TyreLife,FreshTyre,Team,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate
0,0 days 00:57:07.801000,HAM,44,0 days 00:01:52.700000,1.0,1.0,NaT,NaT,NaT,0 days 00:00:49.406000,0 days 00:00:29.478000,NaT,0 days 00:56:38.468000,0 days 00:57:07.904000,326.0,204.0,218.0,309.0,False,MEDIUM,1.0,True,Mercedes,0 days 00:55:14.764000,2024-07-28 13:03:52.742,1,2.0,False,,False,False
1,0 days 00:58:57.650000,HAM,44,0 days 00:01:49.849000,2.0,1.0,NaT,NaT,0 days 00:00:31.284000,0 days 00:00:49.211000,0 days 00:00:29.354000,0 days 00:57:39.019000,0 days 00:58:28.230000,0 days 00:58:57.584000,329.0,202.0,220.0,307.0,True,MEDIUM,2.0,True,Mercedes,0 days 00:57:07.801000,2024-07-28 13:05:45.779,1,2.0,False,,False,True
2,0 days 01:00:46.759000,HAM,44,0 days 00:01:49.109000,3.0,1.0,NaT,NaT,0 days 00:00:31.215000,0 days 00:00:48.486000,0 days 00:00:29.408000,0 days 00:59:28.799000,0 days 01:00:17.285000,0 days 01:00:46.693000,326.0,204.0,216.0,310.0,True,MEDIUM,3.0,True,Mercedes,0 days 00:58:57.650000,2024-07-28 13:07:35.628,1,1.0,False,,False,True
3,0 days 01:02:36.567000,HAM,44,0 days 00:01:49.808000,4.0,1.0,NaT,NaT,0 days 00:00:31.754000,0 days 00:00:48.605000,0 days 00:00:29.449000,0 days 01:01:18.447000,0 days 01:02:07.052000,0 days 01:02:36.501000,305.0,203.0,216.0,305.0,False,MEDIUM,4.0,True,Mercedes,0 days 01:00:46.759000,2024-07-28 13:09:24.737,1,1.0,False,,False,True
4,0 days 01:04:25.943000,HAM,44,0 days 00:01:49.376000,5.0,1.0,NaT,NaT,0 days 00:00:31.592000,0 days 00:00:48.320000,0 days 00:00:29.464000,0 days 01:03:08.093000,0 days 01:03:56.413000,0 days 01:04:25.877000,307.0,201.0,215.0,307.0,False,MEDIUM,5.0,True,Mercedes,0 days 01:02:36.567000,2024-07-28 13:11:14.545,1,1.0,False,,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
836,0 days 02:08:02.508000,RUS,63,0 days 00:01:47.631000,40.0,2.0,NaT,NaT,0 days 00:00:30.989000,0 days 00:00:47.555000,0 days 00:00:29.087000,0 days 02:06:45.826000,0 days 02:07:33.381000,0 days 02:08:02.468000,317.0,208.0,222.0,314.0,False,HARD,30.0,True,Mercedes,0 days 02:06:14.877000,2024-07-28 14:14:52.855,1,1.0,False,,False,True
837,0 days 02:09:50.262000,RUS,63,0 days 00:01:47.754000,41.0,2.0,NaT,NaT,0 days 00:00:30.926000,0 days 00:00:47.796000,0 days 00:00:29.032000,0 days 02:08:33.394000,0 days 02:09:21.190000,0 days 02:09:50.222000,,208.0,221.0,312.0,False,HARD,31.0,True,Mercedes,0 days 02:08:02.508000,2024-07-28 14:16:40.486,1,1.0,False,,False,True
838,0 days 02:11:37.727000,RUS,63,0 days 00:01:47.465000,42.0,2.0,NaT,NaT,0 days 00:00:31.014000,0 days 00:00:47.525000,0 days 00:00:28.926000,0 days 02:10:21.236000,0 days 02:11:08.761000,0 days 02:11:37.687000,324.0,209.0,222.0,314.0,False,HARD,32.0,True,Mercedes,0 days 02:09:50.262000,2024-07-28 14:18:28.240,1,1.0,False,,False,True
839,0 days 02:13:25.028000,RUS,63,0 days 00:01:47.301000,43.0,2.0,NaT,NaT,0 days 00:00:30.990000,0 days 00:00:47.479000,0 days 00:00:28.832000,0 days 02:12:08.677000,0 days 02:12:56.156000,0 days 02:13:24.988000,313.0,209.0,222.0,,True,HARD,33.0,True,Mercedes,0 days 02:11:37.727000,2024-07-28 14:20:15.705,1,1.0,False,,False,True


In [None]:
def get_telemetry_by_driver(year, gp, driver):
    telemetry = ff1.get_session(year, gp, "R")
    telemetry.load(weather=False, messages=False, livedata=False)
    start_date = telemetry.session_info["StartDate"]
    start_date_timestamp = pd.Timestamp(start_date)
    telemetry = telemetry.laps.pick_driver(driver).get_telemetry()
    telemetry = telemetry.drop(columns=["Date", "DriverAhead", "DistanceToDriverAhead", "RPM", "Source", "Distance", "RelativeDistance", "Status", "Brake", "X", "Y", "Z"])
    telemetry["Time"] = start_date_timestamp + pd.to_timedelta(telemetry["Time"])
    telemetry.set_index('Time', inplace=True)
    resampled_data = telemetry.resample('60s').agg({
    'Speed': 'mean',
    'Throttle': 'mean',
    }).reset_index()
    return resampled_data

In [None]:
teste = ff1.get_session(2024, 2, "R")
teste.load(weather=False, messages=False, livedata=False)

In [None]:
teste.results["Abbreviation"].values

In [None]:
telemetry_dataframe = []
#past_events["RoundNumber"].max() + 1
for race in range(1, 5):
    session = ff1.get_session(season, race, "R")
    session.load(weather=False, messages=False, livedata=False)
    drivers = session.results["Abbreviation"].values
    for driver in drivers:
        telemetry_data = get_telemetry_by_driver(season, race, driver)
        telemetry_data["Driver"] = driver
        telemetry_dataframe.append(telemetry_data)
            
df_telemetry_concat = pd.concat(telemetry_dataframe, axis=0, ignore_index=True)

In [None]:
df_telemetry_concat

In [None]:
race_dataframes = []
laps_dataframe = []
for race in range(1, past_events["RoundNumber"].max() + 1):
    df_race = get_race(2023, race)
    race_dataframes.append(df_race)
    lap = get_laps_data(2023, race)
    laps_dataframe.append(lap)

df_race_concat = pd.concat(race_dataframes, axis=0, ignore_index=True)
laps_df_concat = pd.concat(laps_dataframe, axis=0, ignore_index=True)

df_merge_schedule_results = pd.merge(past_events, df_race_concat, on="EventName")
df_merge_all = pd.merge(df_merge_schedule_results, laps_df_concat, on="DriverNumber")

In [None]:
def get_weather_data(year, gp):
    weather = ff1.get_session(year, gp, "R")
    weather.load(telemetry=False, messages=False, livedata=False)
    df_weather = weather.laps.get_weather_data()
    df_weather = df_weather.drop(columns=["Humidity", "Pressure", "WindDirection", "WindSpeed"])
    return df_weather

In [None]:
start_date = race.session_info["StartDate"]
start_date_timestamp = pd.Timestamp(start_date)

In [None]:
start_date_timestamp

In [None]:
teste.head(2)

In [None]:
teste["Time"] = pd.to_timedelta(teste["Time"])

In [None]:
teste.head(2)

In [None]:
teste["Datetime"] = start_date_timestamp + teste["Time"]

In [None]:
teste.head(2)

In [None]:
teste.set_index("Datetime", inplace=True)

In [None]:
teste

In [None]:
teste.to_csv("teste.csv")

In [None]:
resampled_teste = teste.resample('2s').agg({
    'AirTemp': 'mean',
    'TrackTemp': 'mean',
    "Rainfall": 'mean',
}).reset_index()

In [None]:
resampled_teste

In [None]:
resampled_teste["Rainfall"] = resampled_teste["Rainfall"].fillna(0)
resampled_teste["Rainfall"] = np.where(resampled_teste["Rainfall"] > 0, 1, 0)

In [None]:
resampled_teste

In [None]:
df_weather = pd.read_csv("weather_round1_2023.csv")
df_telemetry = pd.read_csv("telemetry_ver_2023")

In [None]:
df_weather.head()

In [None]:
df_merge_3_4 = pd.merge(df_weather, df_telemetry, on="Time", how="outer")

In [None]:
df_merge_3_4