# Pre-processing and Features Selection

## 1. Preprocessing

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import make_pipeline
from sklearn.pipeline import make_union
from sklearn.compose import make_column_transformer
from sklearn.impute import SimpleImputer

from sklearn.preprocessing import OneHotEncoder

In [2]:
# Load the dataset
file_path = '/home/diego_nbotelho/code/diegonbotelho/f1-tire-prediction/raw_data/df_all_races.csv'
all_races_df = pd.read_csv(file_path)

In [3]:
pd.set_option('display.max_columns', None)
all_races_df

Unnamed: 0,Time,Driver,DriverNumber,LapTime,LapNumber,Stint,PitOutTime,PitInTime,Sector1Time,Sector2Time,Sector3Time,Sector1SessionTime,Sector2SessionTime,Sector3SessionTime,SpeedI1,SpeedI2,SpeedFL,SpeedST,IsPersonalBest,Compound,TyreLife,FreshTyre,Team,LapStartTime,LapStartDate,TrackStatus,Position,Deleted,DeletedReason,FastF1Generated,IsAccurate,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,WindDirection,WindSpeed,Event_Year,GrandPrix,Delta_Lap
0,0 days 01:04:15.340000,VER,1,100.236,1.0,1.0,,,,42.325,24.389,,3831.046,3855.427,230.0,254.0,274.0,250.0,False,SOFT,4.0,False,Red Bull Racing,0 days 01:02:34.872000,2022-03-20 15:03:34.889,1,2.0,False,,False,False,23.9,25.0,1010.2,False,29.1,20,0.5,2022,Bahrain,0.000
1,0 days 01:05:53.220000,VER,1,97.880,2.0,1.0,,,31.285,42.269,24.326,3886.662,3928.931,3953.257,,252.0,276.0,296.0,True,SOFT,5.0,False,Red Bull Racing,0 days 01:04:15.340000,2022-03-20 15:05:15.357,1,2.0,False,,False,True,23.8,26.0,1010.4,False,29.0,357,0.5,2022,Bahrain,-2.356
2,0 days 01:07:31.577000,VER,1,98.357,3.0,1.0,,,31.499,42.474,24.384,3984.756,4027.230,4051.614,229.0,254.0,276.0,294.0,False,SOFT,6.0,False,Red Bull Racing,0 days 01:05:53.220000,2022-03-20 15:06:53.237,1,2.0,False,,False,True,23.8,27.0,1010.4,False,29.0,12,0.5,2022,Bahrain,0.477
3,0 days 01:09:10.143000,VER,1,98.566,4.0,1.0,,,31.342,42.674,24.550,4082.956,4125.630,4150.180,231.0,250.0,276.0,297.0,False,SOFT,7.0,False,Red Bull Racing,0 days 01:07:31.577000,2022-03-20 15:08:31.594,1,2.0,False,,False,True,23.8,29.0,1010.4,False,28.7,12,0.4,2022,Bahrain,0.209
4,0 days 01:10:49.020000,VER,1,98.877,5.0,1.0,,,31.498,42.854,24.525,4181.678,4224.532,4249.057,229.0,256.0,276.0,293.0,False,SOFT,8.0,False,Red Bull Racing,0 days 01:09:10.143000,2022-03-20 15:10:10.160,1,2.0,False,,False,True,23.8,33.0,1010.4,False,28.5,316,0.4,2022,Bahrain,0.311
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67074,0 days 02:18:53.038000,PIA,81,87.731,54.0,3.0,,,17.591,38.078,32.062,8262.900,8300.978,8333.040,,293.0,215.0,304.0,False,HARD,22.0,True,McLaren,0 days 02:17:25.307000,2024-12-08 14:24:12.524,1,10.0,False,,False,True,25.9,60.0,1017.8,False,29.3,109,2.0,2024,Abu Dhabi,-0.437
67075,0 days 02:20:20.819000,PIA,81,87.781,55.0,3.0,,,17.639,38.014,32.128,8350.679,8388.693,8420.821,290.0,295.0,214.0,307.0,False,HARD,23.0,True,McLaren,0 days 02:18:53.038000,2024-12-08 14:25:40.255,1,10.0,False,,False,True,25.9,60.0,1017.9,False,29.3,117,3.2,2024,Abu Dhabi,0.050
67076,0 days 02:21:48.635000,PIA,81,87.816,56.0,3.0,,,17.603,38.000,32.213,8438.424,8476.424,8508.637,293.0,296.0,214.0,309.0,False,HARD,24.0,True,McLaren,0 days 02:20:20.819000,2024-12-08 14:27:08.036,1,10.0,False,,False,True,25.9,60.0,1018.0,False,29.4,110,2.1,2024,Abu Dhabi,0.035
67077,0 days 02:23:17.189000,PIA,81,88.554,57.0,3.0,,,17.650,38.546,32.358,8526.287,8564.833,8597.191,295.0,306.0,213.0,310.0,False,HARD,25.0,True,McLaren,0 days 02:21:48.635000,2024-12-08 14:28:35.852,12,10.0,False,,False,True,25.9,60.0,1018.0,False,29.4,104,2.4,2024,Abu Dhabi,0.738


In [4]:
# Display basic info
print("Dataset shape:", all_races_df.shape)

Dataset shape: (67079, 41)


In [5]:
# Missing Data Analysis
print("Missing data per column:")
print(all_races_df.isnull().sum())

Missing data per column:
Time                      0
Driver                    0
DriverNumber              0
LapTime                1004
LapNumber                 0
Stint                     0
PitOutTime            64732
PitInTime             64708
Sector1Time            1437
Sector2Time             122
Sector3Time             230
Sector1SessionTime     1598
Sector2SessionTime      122
Sector3SessionTime      230
SpeedI1               10095
SpeedI2                 119
SpeedFL                2483
SpeedST                6014
IsPersonalBest           80
Compound                  0
TyreLife                  0
FreshTyre                 0
Team                      0
LapStartTime              0
LapStartDate             80
TrackStatus               0
Position                109
Deleted                   0
DeletedReason         66192
FastF1Generated           0
IsAccurate                0
AirTemp                   0
Humidity                  0
Pressure                  0
Rainfall               

In [6]:
# Exclusion of features that are not necessary
columns_to_remove = [
    'Time',
    'DriverNumber',
    'PitOutTime',
    'PitInTime',
    'Sector1SessionTime',
    'Sector2SessionTime',
    'Sector3SessionTime',
    'SpeedI1',
    'SpeedI2',
    'SpeedFL',
    'SpeedST',
    'IsPersonalBest',
    'FreshTyre',
    'Team',
    'LapStartTime',
    'LapStartDate',
    'Deleted',
    'DeletedReason',
    'FastF1Generated',
    'IsAccurate',
    'WindDirection',
    'WindSpeed'
]

new_df = all_races_df.drop(columns=columns_to_remove)
new_df.head(1)

Unnamed: 0,Driver,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,Compound,TyreLife,Position,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,GrandPrix,Delta_Lap
0,VER,100.236,1.0,1.0,,42.325,24.389,SOFT,4.0,2.0,23.9,25.0,1010.2,False,29.1,Bahrain,0.0


In [7]:
# Display basic info
print("Dataset shape:", new_df.shape)

Dataset shape: (67079, 17)


In [8]:
# Missing Data Analysis
print("Missing data per column:")
print(new_df.isnull().sum())

Missing data per column:
Driver            0
LapTime        1004
LapNumber         0
Stint             0
Sector1Time    1437
Sector2Time     122
Sector3Time     230
Compound          0
TyreLife          0
Position        109
AirTemp           0
Humidity          0
Pressure          0
Rainfall          0
TrackTemp         0
GrandPrix         0
Delta_Lap         0
dtype: int64


In [9]:
new_df = new_df.dropna(subset=['Position'])

In [10]:
# Missing Data Analysis
print("Missing data per column:")
print(new_df.isnull().sum())

Missing data per column:
Driver            0
LapTime         895
LapNumber         0
Stint             0
Sector1Time    1328
Sector2Time      13
Sector3Time     121
Compound          0
TyreLife          0
Position          0
AirTemp           0
Humidity          0
Pressure          0
Rainfall          0
TrackTemp         0
GrandPrix         0
Delta_Lap         0
dtype: int64


In [11]:
# Verify duplicates
print("\nNumber of duplicates:")
print(new_df.duplicated().sum())


Number of duplicates:
0


In [12]:
# Strategy for missing values

# Fill null values in numeric columns with the median
numerical_columns = new_df.select_dtypes(include=[np.number]).columns
new_df.loc[:, numerical_columns] = new_df[numerical_columns].fillna(new_df[numerical_columns].median())

# Verify missing values after treatment
print("\nMissing values after treatment:")
print(new_df.isnull().sum())


Missing values after treatment:
Driver         0
LapTime        0
LapNumber      0
Stint          0
Sector1Time    0
Sector2Time    0
Sector3Time    0
Compound       0
TyreLife       0
Position       0
AirTemp        0
Humidity       0
Pressure       0
Rainfall       0
TrackTemp      0
GrandPrix      0
Delta_Lap      0
dtype: int64


In [13]:
new_df['Compound'].value_counts()

Compound
HARD            29837
MEDIUM          23344
SOFT             8669
INTERMEDIATE     4903
WET               217
Name: count, dtype: int64

## 2 Scale the features

In [15]:
# Numerical variables
numerical_features = [
    'LapTime',            # Lap time in seconds
    'TyreLife',           # Tyre life
    'AirTemp',            # Air temperature
    'TrackTemp',          # Track temperature
    'WindSpeed',          # Wind speed
    'SpeedI1',            # Speed in the first sector
    'SpeedI2',            # Speed in the second sector
    'SpeedFL',            # Speed on the main straight
    'SpeedST',            # Speed in the timing sector
    'Position',           # Position in the race
    'Humidity',           # Relative humidity of the air
    'Pressure',           # Atmospheric pressure
    'WindDirection',      # Wind direction
    'Sector1Time',        # Time in sector 1
    'Sector2Time',        # Time in sector 2
    'Sector3Time',        # Time in sector 3
    'LapNumber',          # Number of the lap
    'Delta_Lap'           # time difference between two consecutives laps for each pilot
]

In [16]:
# Categorical variables
categorical_features = [
    'Compound',    # Tire type (SOFT, MEDIUM, HARD)
    'TrackStatus', # Track status (green flag, yellow flag, etc.)
    'FreshTyre',   # True or false
    'Rainfall'     # True or false
]

In [18]:
new_df.describe()

Unnamed: 0,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,TyreLife,Position,AirTemp,Humidity,Pressure,TrackTemp,Delta_Lap
count,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0
mean,90.525069,31.072704,2.146036,28.203706,35.667176,26.812733,14.805405,9.735165,24.301,53.226594,980.547689,35.876464,0.276833
std,39.764341,18.737846,0.989543,7.859066,8.443752,6.790808,10.825625,5.399382,4.457512,18.001104,57.816682,7.539795,45.947747
min,67.012,1.0,1.0,16.835,17.488,16.913,1.0,1.0,15.7,5.0,778.5,18.1,-2414.122
25%,79.952,15.0,1.0,21.89425,30.205,21.952,6.0,5.0,20.8,38.0,971.0,30.1,-0.316
50%,86.64,30.0,2.0,28.833,33.283,25.452,13.0,10.0,24.0,56.0,1007.7,35.0,0.0
75%,96.966,46.0,3.0,31.67,41.981,30.16075,21.0,14.0,27.3,65.0,1013.4,42.6,0.305
max,2526.253,78.0,8.0,98.794,90.308,99.74,78.0,20.0,37.2,92.0,1020.9,51.3,2433.269


In [19]:
# Colummns for Robust Scaler
columns_for_robust_scaler = ['LapTime',
                             'TyreLife',
                             'AirTemp',
                             'TrackTemp',
                             'Humidity',
                             'Pressure',
                             'Sector1Time',
                             'Sector2Time',
                             'Sector3Time']

# Colummns for MinMax Scaler
columns_for_minmax_scaler = ['Position', 'LapNumber']

# Columns for Standar Scaler
columns_for_standard_scaler = ['Delta_Lap']

In [20]:
distribution_pipeline = Pipeline([
    ('Median_Imputer', SimpleImputer(strategy = 'median')),
    ('Robust_Scaler', RobustScaler())
])

In [21]:
range_pipeline = Pipeline([
    ('Median_Imputer', SimpleImputer(strategy = 'median')),
    ('Minmax_Scaler', MinMaxScaler())
])

In [22]:
normal_pipeline = Pipeline([
    ('Median_imputer', SimpleImputer(strategy = 'median')),
    ('Standard_Scaler', StandardScaler())
])

In [23]:
numeric_preprocessor = ColumnTransformer([
    ('robust_transformer', distribution_pipeline, ['LapTime',
                                                     'TyreLife',
                                                     'AirTemp',
                                                     'TrackTemp',
                                                     'Humidity',
                                                     'Pressure',
                                                     'Sector1Time',
                                                     'Sector2Time',
                                                     'Sector3Time']),
    ('range_transformer', range_pipeline, ['Position', 'LapNumber']),
    ('normalizator', normal_pipeline, ['Delta_Lap'])
])

In [24]:
numeric_preprocessor

In [26]:
transformed_dataframe = pd.DataFrame(numeric_preprocessor.fit_transform(new_df), columns = numeric_preprocessor.get_feature_names_out())

In [27]:
transformed_dataframe.describe()

Unnamed: 0,robust_transformer__LapTime,robust_transformer__TyreLife,robust_transformer__AirTemp,robust_transformer__TrackTemp,robust_transformer__Humidity,robust_transformer__Pressure,robust_transformer__Sector1Time,robust_transformer__Sector2Time,robust_transformer__Sector3Time,range_transformer__Position,range_transformer__LapNumber,normalizator__Delta_Lap
count,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0
mean,0.228345,0.12036,0.046308,0.070117,-0.102719,-0.640385,-0.064373,0.202461,0.165766,0.459746,0.390555,7.692152999999999e-19
std,2.337154,0.721708,0.685771,0.603184,0.666708,1.363601,0.803935,0.717031,0.827265,0.284178,0.243349,1.000007
min,-1.153638,-0.8,-1.276923,-1.352,-1.888889,-5.40566,-1.227323,-1.341287,-1.040231,0.0,0.0,-52.54701
25%,-0.393088,-0.466667,-0.492308,-0.392,-0.666667,-0.865566,-0.709792,-0.261379,-0.426374,0.210526,0.181818,-0.01290244
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.473684,0.376623,-0.006025008
75%,0.606912,0.533333,0.507692,0.608,0.333333,0.134434,0.290208,0.738621,0.573626,0.684211,0.584416,0.0006130169
max,143.388562,4.333333,2.030769,1.304,1.333333,0.311321,7.156586,4.842476,9.049855,1.0,1.0,52.95168


In [29]:
new_df.describe()

Unnamed: 0,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,TyreLife,Position,AirTemp,Humidity,Pressure,TrackTemp,Delta_Lap
count,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0,66970.0
mean,90.525069,31.072704,2.146036,28.203706,35.667176,26.812733,14.805405,9.735165,24.301,53.226594,980.547689,35.876464,0.276833
std,39.764341,18.737846,0.989543,7.859066,8.443752,6.790808,10.825625,5.399382,4.457512,18.001104,57.816682,7.539795,45.947747
min,67.012,1.0,1.0,16.835,17.488,16.913,1.0,1.0,15.7,5.0,778.5,18.1,-2414.122
25%,79.952,15.0,1.0,21.89425,30.205,21.952,6.0,5.0,20.8,38.0,971.0,30.1,-0.316
50%,86.64,30.0,2.0,28.833,33.283,25.452,13.0,10.0,24.0,56.0,1007.7,35.0,0.0
75%,96.966,46.0,3.0,31.67,41.981,30.16075,21.0,14.0,27.3,65.0,1013.4,42.6,0.305
max,2526.253,78.0,8.0,98.794,90.308,99.74,78.0,20.0,37.2,92.0,1020.9,51.3,2433.269


## 3. Encode features (OneHotEnconder)

In [30]:
ohe = OneHotEncoder(sparse_output=False)
ohe.fit(new_df[['Driver']])

new_df[ohe.get_feature_names_out()] = ohe.transform(new_df[['Driver']])

new_df = new_df.drop(columns=["Driver"])
new_df.head(3)

Unnamed: 0,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,Compound,TyreLife,Position,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,GrandPrix,Delta_Lap,Driver_ALB,Driver_ALO,Driver_BEA,Driver_BOT,Driver_COL,Driver_DEV,Driver_DOO,Driver_GAS,Driver_HAM,Driver_HUL,Driver_LAT,Driver_LAW,Driver_LEC,Driver_MAG,Driver_MSC,Driver_NOR,Driver_OCO,Driver_PER,Driver_PIA,Driver_RIC,Driver_RUS,Driver_SAI,Driver_SAR,Driver_STR,Driver_TSU,Driver_VER,Driver_VET,Driver_ZHO
0,100.236,1.0,1.0,28.833,42.325,24.389,SOFT,4.0,2.0,23.9,25.0,1010.2,False,29.1,Bahrain,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,97.88,2.0,1.0,31.285,42.269,24.326,SOFT,5.0,2.0,23.8,26.0,1010.4,False,29.0,Bahrain,-2.356,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,98.357,3.0,1.0,31.499,42.474,24.384,SOFT,6.0,2.0,23.8,27.0,1010.4,False,29.0,Bahrain,0.477,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [31]:
ohe = OneHotEncoder(sparse_output=False)
ohe.fit(new_df[['GrandPrix']])

new_df[ohe.get_feature_names_out()] = ohe.transform(new_df[['GrandPrix']])

new_df = new_df.drop(columns=["GrandPrix"])
new_df.head(3)

Unnamed: 0,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,Compound,TyreLife,Position,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,Delta_Lap,Driver_ALB,Driver_ALO,Driver_BEA,Driver_BOT,Driver_COL,Driver_DEV,Driver_DOO,Driver_GAS,Driver_HAM,Driver_HUL,Driver_LAT,Driver_LAW,Driver_LEC,Driver_MAG,Driver_MSC,Driver_NOR,Driver_OCO,Driver_PER,Driver_PIA,Driver_RIC,Driver_RUS,Driver_SAI,Driver_SAR,Driver_STR,Driver_TSU,Driver_VER,Driver_VET,Driver_ZHO,GrandPrix_Abu Dhabi,GrandPrix_Australia,GrandPrix_Austria,GrandPrix_Azerbaijan,GrandPrix_Bahrain,GrandPrix_Belgium,GrandPrix_Brazil,GrandPrix_Canada,GrandPrix_Great Britain,GrandPrix_Hungary,GrandPrix_Italy,GrandPrix_Japan,GrandPrix_Mexico,GrandPrix_Miami,GrandPrix_Monaco,GrandPrix_Netherlands,GrandPrix_Saudi Arabia,GrandPrix_Singapore,GrandPrix_Spain,GrandPrix_USA
0,100.236,1.0,1.0,28.833,42.325,24.389,SOFT,4.0,2.0,23.9,25.0,1010.2,False,29.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,97.88,2.0,1.0,31.285,42.269,24.326,SOFT,5.0,2.0,23.8,26.0,1010.4,False,29.0,-2.356,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,98.357,3.0,1.0,31.499,42.474,24.384,SOFT,6.0,2.0,23.8,27.0,1010.4,False,29.0,0.477,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
ohe = OneHotEncoder(sparse_output=False)
ohe.fit(new_df[['Compound']])

new_df[ohe.get_feature_names_out()] = ohe.transform(new_df[['Compound']])

new_df = new_df.drop(columns=["Compound"])
new_df.head(3)

Unnamed: 0,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,TyreLife,Position,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,Delta_Lap,Driver_ALB,Driver_ALO,Driver_BEA,Driver_BOT,Driver_COL,Driver_DEV,Driver_DOO,Driver_GAS,Driver_HAM,Driver_HUL,Driver_LAT,Driver_LAW,Driver_LEC,Driver_MAG,Driver_MSC,Driver_NOR,Driver_OCO,Driver_PER,Driver_PIA,Driver_RIC,Driver_RUS,Driver_SAI,Driver_SAR,Driver_STR,Driver_TSU,Driver_VER,Driver_VET,Driver_ZHO,GrandPrix_Abu Dhabi,GrandPrix_Australia,GrandPrix_Austria,GrandPrix_Azerbaijan,GrandPrix_Bahrain,GrandPrix_Belgium,GrandPrix_Brazil,GrandPrix_Canada,GrandPrix_Great Britain,GrandPrix_Hungary,GrandPrix_Italy,GrandPrix_Japan,GrandPrix_Mexico,GrandPrix_Miami,GrandPrix_Monaco,GrandPrix_Netherlands,GrandPrix_Saudi Arabia,GrandPrix_Singapore,GrandPrix_Spain,GrandPrix_USA,Compound_HARD,Compound_INTERMEDIATE,Compound_MEDIUM,Compound_SOFT,Compound_WET
0,100.236,1.0,1.0,28.833,42.325,24.389,4.0,2.0,23.9,25.0,1010.2,False,29.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,97.88,2.0,1.0,31.285,42.269,24.326,5.0,2.0,23.8,26.0,1010.4,False,29.0,-2.356,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,98.357,3.0,1.0,31.499,42.474,24.384,6.0,2.0,23.8,27.0,1010.4,False,29.0,0.477,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [33]:
new_df

Unnamed: 0,LapTime,LapNumber,Stint,Sector1Time,Sector2Time,Sector3Time,TyreLife,Position,AirTemp,Humidity,Pressure,Rainfall,TrackTemp,Delta_Lap,Driver_ALB,Driver_ALO,Driver_BEA,Driver_BOT,Driver_COL,Driver_DEV,Driver_DOO,Driver_GAS,Driver_HAM,Driver_HUL,Driver_LAT,Driver_LAW,Driver_LEC,Driver_MAG,Driver_MSC,Driver_NOR,Driver_OCO,Driver_PER,Driver_PIA,Driver_RIC,Driver_RUS,Driver_SAI,Driver_SAR,Driver_STR,Driver_TSU,Driver_VER,Driver_VET,Driver_ZHO,GrandPrix_Abu Dhabi,GrandPrix_Australia,GrandPrix_Austria,GrandPrix_Azerbaijan,GrandPrix_Bahrain,GrandPrix_Belgium,GrandPrix_Brazil,GrandPrix_Canada,GrandPrix_Great Britain,GrandPrix_Hungary,GrandPrix_Italy,GrandPrix_Japan,GrandPrix_Mexico,GrandPrix_Miami,GrandPrix_Monaco,GrandPrix_Netherlands,GrandPrix_Saudi Arabia,GrandPrix_Singapore,GrandPrix_Spain,GrandPrix_USA,Compound_HARD,Compound_INTERMEDIATE,Compound_MEDIUM,Compound_SOFT,Compound_WET
0,100.236,1.0,1.0,28.833,42.325,24.389,4.0,2.0,23.9,25.0,1010.2,False,29.1,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,97.880,2.0,1.0,31.285,42.269,24.326,5.0,2.0,23.8,26.0,1010.4,False,29.0,-2.356,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,98.357,3.0,1.0,31.499,42.474,24.384,6.0,2.0,23.8,27.0,1010.4,False,29.0,0.477,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,98.566,4.0,1.0,31.342,42.674,24.550,7.0,2.0,23.8,29.0,1010.4,False,28.7,0.209,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,98.877,5.0,1.0,31.498,42.854,24.525,8.0,2.0,23.8,33.0,1010.4,False,28.5,0.311,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67074,87.731,54.0,3.0,17.591,38.078,32.062,22.0,10.0,25.9,60.0,1017.8,False,29.3,-0.437,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
67075,87.781,55.0,3.0,17.639,38.014,32.128,23.0,10.0,25.9,60.0,1017.9,False,29.3,0.050,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
67076,87.816,56.0,3.0,17.603,38.000,32.213,24.0,10.0,25.9,60.0,1018.0,False,29.4,0.035,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
67077,88.554,57.0,3.0,17.650,38.546,32.358,25.0,10.0,25.9,60.0,1018.0,False,29.4,0.738,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
