In [31]:
import pandas as pd
import ast
import numpy as np


In [32]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# Data preparation

In [33]:
def convert_to_seconds(df: pd.DataFrame, columns=None):
    """
    Convert specified columns in the DataFrame from str to total seconds.

    Parameters:
    df (pd.DataFrame): The input DataFrame.
    columns (list): List of column names to convert.

    Returns:
    pd.DataFrame: The modified DataFrame with specified columns converted to seconds.
    """
    if columns is None:
        columns = ['LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time']
    for col in columns:
        if not df[col].dtype == 'float64':
            df[col] = pd.to_timedelta(df[col]).dt.total_seconds()
    return df


In [34]:
race = pd.read_csv('../data/mexican_gp_race_datasets_2018-2019-2021-2022-2023/lap_mexico_grand_prix_2018-2019-2021-2022-2023.csv')
race = convert_to_seconds(race)

In [35]:
# for N stints use all avaliable data and then loop through N to get all predictions
# for compound prediction use only weather, driver, team, stint number, previous compound, starting grid, n laps
# for lap prediction use the same data as for compound prediction, n laps should be absolute, not relative to tyre life - less chance to get more than 72 laps in total
# for time use qual data and lap and comp data

In [36]:
Y_feat = pd.DataFrame()
# Leave df base for aggregation in it
Y_feat = race.drop_duplicates(subset=['Year', 'Driver', 'Stint', 'Compound'])[['Year', 'Driver', 'Stint', 'Compound']]
# Add Avg time feature
average_time = race.groupby(['Year', 'Driver', 'Stint'])['LapTime'].mean().reset_index()
Y_feat = pd.merge(left=Y_feat, right=average_time, on=['Year', 'Driver', 'Stint'])
Y_feat.rename(columns={'LapTime': 'Avg lap time'}, inplace=True)
# Add tyre life feature
tyre_life = race.groupby(['Year', 'Driver', 'Stint'])['LapNumber'].max().reset_index()
Y_feat = pd.merge(left=Y_feat, right=tyre_life, on=['Year', 'Driver', 'Stint'])
Y_feat.rename(columns={'LapNumber': 'Pit lap'}, inplace=True)
# Add tyre life feature
Y_feat['Tyre life'] = np.where(
    Y_feat['Stint'] == 1,
    Y_feat['Pit lap'],
    Y_feat['Pit lap'] - Y_feat['Pit lap'].shift(1))
# Add number of stints
n_stints = Y_feat.groupby(['Year', 'Driver'])['Stint'].max().reset_index()
n_stints.rename(columns={'Stint': 'N stints'}, inplace=True)
Y_feat = pd.merge(left=Y_feat, right=n_stints, on=['Year', 'Driver'], how='right')


In [37]:
quals_lap = pd.read_csv('../data/mexican_gp_qualifying_datasets_2018-2019-2021-2022-2023/lap_mexico_grand_prix_2018-2019-2021-2022-2023.csv')
quals_weather = pd.read_csv('../data/mexican_gp_qualifying_datasets_2018-2019-2021-2022-2023/weather_mexico_grand_prix_2018-2019-2021-2022-2023.csv')
quals_results = pd.read_csv('../data/mexican_gp_qualifying_datasets_2018-2019-2021-2022-2023/result_mexico_grand_prix_2018-2019-2021-2022-2023.csv')
quals_lap = convert_to_seconds(quals_lap)

In [38]:
# Leave only necessary columns and rows without box
qual_wo_box = quals_lap[quals_lap['PitOutTime'].isna() & quals_lap['PitInTime'].isna()][['Year', 'Driver', 'LapTime', 'Sector1Time', 'Sector2Time',
       'Sector3Time', 'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST',
       'IsPersonalBest', 'Compound', 'TyreLife', 'FreshTyre', 'Team',
       'TrackStatus', 'Deleted']]

In [39]:
# Compute average weather
avg_weather = quals_weather.groupby(['Year'])[['AirTemp', 'Humidity', 'Pressure', 'TrackTemp']].mean()
    
# Handle Rainfall: if any value is True, the result is True
avg_weather['Rainfall'] = quals_weather.groupby(['Year'])['Rainfall'].any()


In [40]:
X_feat = qual_wo_box.groupby(['Year', 'Driver'])[['LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time']].min().reset_index()

max_speed = qual_wo_box.groupby(['Year', 'Driver'])[['SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST']].max().reset_index()

X_feat = pd.merge(X_feat, max_speed, on=['Year', 'Driver'])

X_feat = pd.merge(X_feat, quals_results[['Year', 'Abbreviation', 'Position', 'TeamName']], left_on=['Year', 'Driver'], right_on=['Year', 'Abbreviation'])
X_feat.drop(columns=['Abbreviation'], inplace=True)

X_feat = pd.merge(X_feat, avg_weather, on=['Year'], how='left')

In [41]:
res_X_Y = pd.merge(X_feat, Y_feat, on=['Year', 'Driver'], how='outer')
res_X_Y = res_X_Y[res_X_Y['Year'] > 2018]
res_X_Y = res_X_Y.drop(columns=['Year'])
res_X_Y.reset_index(inplace=True, drop=True)


# Model build

# N stints prediction

In [27]:
n_stint_x_y = res_X_Y[(res_X_Y['Stint'] == res_X_Y['N stints']) & (res_X_Y['N stints'] != 1)].reset_index(drop=True)

In [28]:
n_stint_x_y = n_stint_x_y.drop(columns=['Stint', 'Compound', 'Avg lap time', 'Pit lap', 'Tyre life'])

In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Handling categorical variables using Label Encoding
label_encoders = {}
for col in ['Driver', 'TeamName']:
    le = LabelEncoder()
    n_stint_x_y[col] = le.fit_transform(n_stint_x_y[col])
    label_encoders[col] = le

# Treat Position as integer
n_stint_x_y['Position'] = n_stint_x_y['Position'].astype(int)

# Features and target
X = n_stint_x_y.drop('N stints', axis=1)
y = n_stint_x_y['N stints']

# Standard scaling for numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3)

# Model: RandomForestRegressor
n_stint_model = RandomForestRegressor()
n_stint_model.fit(X_train, y_train)

# Cross-validation scores
cv_scores = cross_val_score(n_stint_model, X_train, y_train, cv=5, scoring='neg_mean_squared_error')

# Evaluate the n_stint_model on test data
y_pred = n_stint_model.predict(X_test)
y_pred = np.round(y_pred).astype(int)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Cross-validation MSE:", -cv_scores.mean())
print("Test MSE:", mse)
print("Test MAE:", mae)


Cross-validation MSE: 0.46868945454545463
Test MSE: 0.3333333333333333
Test MAE: 0.3333333333333333


## Compound & tyre life prediction

In [30]:
comp_x_y = res_X_Y[res_X_Y['Year'] > 2018]

KeyError: 'Year'

In [458]:
comp_x_y = comp_x_y.drop(columns=['Avg lap time', 'N stints', 'LapTime', 'Sector1Time', 'Sector2Time',
       'Sector3Time', 'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST'])

In [459]:
# comp_x_y['prev Compound'] = comp_x_y.groupby(['Year', 'Driver'])['Compound'].shift(1)
# comp_x_y['prev Pit lap'] = comp_x_y.groupby(['Year', 'Driver'])['Pit lap'].shift(1)
# comp_x_y['prev Tyre life'] = comp_x_y.groupby(['Year', 'Driver'])['Tyre life'].shift(1)

In [460]:
comp_x_y.drop(columns=['Tyre life'], inplace=True)

In [461]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error

# Handling categorical variables using Label Encoding
label_encoders = {}
for col in ['Driver', 'TeamName', 'Compound']:
    le = LabelEncoder()
    comp_x_y[col] = le.fit_transform(comp_x_y[col])
    label_encoders[col] = le

# Features and targets
X = comp_x_y.drop(['Compound', 'Pit lap'], axis=1)
y_compound = comp_x_y['Compound']
y_pit_lap = comp_x_y['Pit lap']

# Scaling numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [492]:
# Train-test split for both outputs
X_train, X_test, y_comp_train, y_comp_test, y_pit_train, y_pit_test = train_test_split(
    X_scaled, y_compound, y_pit_lap, test_size=0.3)

# Model for Compound (classification)
compound_model = RandomForestClassifier()
compound_model.fit(X_train, y_comp_train)

# Model for Pit lap (regression)
pit_lap_model = RandomForestRegressor()
pit_lap_model.fit(X_train, y_pit_train)

# Predictions
y_comp_pred = compound_model.predict(X_test)
y_pit_pred = pit_lap_model.predict(X_test)
y_pit_pred = np.round(y_pit_pred).astype(int)

# Evaluation
comp_accuracy = accuracy_score(y_comp_test, y_comp_pred)
# pit_mse = mean_squared_error(y_pit_test, y_pit_pred)

print(f"Compound Classification Accuracy: {comp_accuracy}")
print(f"Pit Lap Regression MSE: {pit_mse}")

Compound Classification Accuracy: 0.6724137931034483
Pit Lap Regression MSE: 164.97640344827587


## Lap time prediction

In [503]:
lap_x_y = res_X_Y[res_X_Y['Year'] > 2018]

In [504]:
lap_x_y = lap_x_y.drop(columns=['N stints'])
lap_x_y = lap_x_y[~lap_x_y['Avg lap time'].isna()]

In [554]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error

# Assuming lap_x_y is already loaded

# Handling categorical variables using Label Encoding
label_encoders = {}
for col in ['Driver', 'TeamName', 'Compound']:
    le = LabelEncoder()
    lap_x_y[col] = le.fit_transform(lap_x_y[col])
    label_encoders[col] = le

# Features and target
X = lap_x_y.drop(['Avg lap time'], axis=1)
y = lap_x_y['Avg lap time']

# Standard scaling for numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3)

# Model: RandomForestRegressor
lap_time_model = RandomForestRegressor()
lap_time_model.fit(X_train, y_train)

# Predictions and evaluation
y_pred = lap_time_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)

print(f"Mean Squared Error on Test Set: {mse}")


Mean Squared Error on Test Set: 15.461264465346897


# 3 Model total

['SOFT', 'HARD', 'MEDIUM', 'INTERMEDIATE', 'WET']

['ALB', 'ALO', 'BEA', 'BOT', 'ERI', 'GAS', 'GIO', 'GRO', 'HAM',
       'HAR', 'HUL', 'KUB', 'KVY', 'LAT', 'LEC', 'MAG', 'MAZ', 'MSC',
       'NOR', 'OCO', 'PER', 'PIA', 'RAI', 'RIC', 'RUS', 'SAI', 'SAR',
       'SIR', 'STR', 'TSU', 'VAN', 'VER', 'VET', 'ZHO']

In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score

# Assuming res_X_Y is already created from merging X_feat and Y_feat

# --- N stints Prediction ---
n_stint_x_y = res_X_Y[(res_X_Y['Stint'] == res_X_Y['N stints']) & (res_X_Y['N stints'] != 1)].reset_index(drop=True)
n_stint_x_y = n_stint_x_y.drop(columns=['Stint', 'Compound', 'Avg lap time', 'Pit lap', 'Tyre life'])

# Encoding categorical variables
label_encoders_n_stint = {}
for col in ['Driver', 'TeamName']:
    le = LabelEncoder()
    n_stint_x_y[col] = le.fit_transform(n_stint_x_y[col])
    label_encoders_n_stint[col] = le

# Treat Position as integer
n_stint_x_y['Position'] = n_stint_x_y['Position'].astype(int)

# Features and target
X_n_stint = n_stint_x_y.drop('N stints', axis=1)
y_n_stint = n_stint_x_y['N stints']

# Scaling numerical features
scaler_n_stint = StandardScaler()
X_n_stint_scaled = scaler_n_stint.fit_transform(X_n_stint)

# Train-test split
X_train_ns, X_test_ns, y_train_ns, y_test_ns = train_test_split(X_n_stint_scaled, y_n_stint, test_size=0.3)

# Model for N stints
n_stint_model = RandomForestRegressor()
n_stint_model.fit(X_train_ns, y_train_ns)

# Cross-validation and evaluation
cv_scores_ns = cross_val_score(n_stint_model, X_train_ns, y_train_ns, cv=5, scoring='neg_mean_squared_error')
y_pred_ns = np.round(n_stint_model.predict(X_test_ns)).astype(int)
mse_ns = mean_squared_error(y_test_ns, y_pred_ns)
mae_ns = mean_absolute_error(y_test_ns, y_pred_ns)

print("Cross-validation MSE (N stints):", -cv_scores_ns.mean())
print("Test MSE (N stints):", mse_ns)
print("Test MAE (N stints):", mae_ns)

# --- Compound and Pit lap Prediction ---

comp_x_y = res_X_Y.copy()
comp_x_y = comp_x_y.drop(columns=['Avg lap time', 'N stints', 'LapTime', 'Sector1Time', 'Sector2Time',
                                  'Sector3Time', 'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST', 'Tyre life'])

# Encoding categorical variables
label_encoders_comp = {}
for col in ['Driver', 'TeamName', 'Compound']:
    le = LabelEncoder()
    comp_x_y[col] = le.fit_transform(comp_x_y[col])
    label_encoders_comp[col] = le

# Features and targets
X_comp = comp_x_y.drop(['Compound', 'Pit lap'], axis=1)
y_compound = comp_x_y['Compound']
y_pit_lap = comp_x_y['Pit lap']

# Scaling numerical features
scaler_comp = StandardScaler()
X_comp_scaled = scaler_comp.fit_transform(X_comp)

# Train-test split
X_train_comp, X_test_comp, y_comp_train, y_comp_test, y_pit_train, y_pit_test = train_test_split(
    X_comp_scaled, y_compound, y_pit_lap, test_size=0.3)

# Model for Compound (classification)
compound_model = RandomForestClassifier()
compound_model.fit(X_train_comp, y_comp_train)

# Model for Pit lap (regression)
pit_lap_model = RandomForestRegressor()
pit_lap_model.fit(X_train_comp, y_pit_train)

# Predictions and evaluation
y_comp_pred = compound_model.predict(X_test_comp)
y_pit_pred = np.round(pit_lap_model.predict(X_test_comp)).astype(int)

# Evaluation
comp_accuracy = accuracy_score(y_comp_test, y_comp_pred)
pit_mse = mean_squared_error(y_pit_test, y_pit_pred)

print(f"Compound Classification Accuracy: {comp_accuracy}")
print(f"Pit Lap Regression MSE: {pit_mse}")

# --- Lap Time Prediction ---
lap_x_y = res_X_Y.copy()
lap_x_y = lap_x_y.drop(columns=['N stints'])
lap_x_y = lap_x_y[~lap_x_y['Avg lap time'].isna()]

# Encoding categorical variables
label_encoders_lap = {}
for col in ['Driver', 'TeamName', 'Compound']:
    le = LabelEncoder()
    lap_x_y[col] = le.fit_transform(lap_x_y[col])
    label_encoders_lap[col] = le

# Features and target
X_lap = lap_x_y.drop(['Avg lap time'], axis=1)
y_lap = lap_x_y['Avg lap time']

# Scaling numerical features
scaler_lap = StandardScaler()
X_lap_scaled = scaler_lap.fit_transform(X_lap)

# Train-test split
X_train_lap, X_test_lap, y_train_lap, y_test_lap = train_test_split(X_lap_scaled, y_lap, test_size=0.3)

# Model for Lap Time (regression)
lap_time_model = RandomForestRegressor()
lap_time_model.fit(X_train_lap, y_train_lap)

# Predictions and evaluation
y_pred_lap = lap_time_model.predict(X_test_lap)
mse_lap = mean_squared_error(y_test_lap, y_pred_lap)

print(f"Mean Squared Error on Lap Time Prediction: {mse_lap}")


Cross-validation MSE (N stints): 0.5412052727272727
Test MSE (N stints): 0.4166666666666667
Test MAE (N stints): 0.4166666666666667
Compound Classification Accuracy: 0.6896551724137931
Pit Lap Regression MSE: 173.0
Mean Squared Error on Lap Time Prediction: 13.245556876335739


In [42]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, accuracy_score

# Assuming res_X_Y is already created from merging X_feat and Y_feat

# --- N stints Prediction ---
n_stint_x_y = res_X_Y[(res_X_Y['Stint'] == res_X_Y['N stints']) & (res_X_Y['N stints'] != 1)].reset_index(drop=True)
n_stint_x_y = n_stint_x_y.drop(columns=['Stint', 'Compound', 'Avg lap time', 'Pit lap', 'Tyre life'])

# Encoding categorical variables
label_encoders_n_stint = {}
drivers = ['ALB', 'ALO', 'BEA', 'BOT', 'ERI', 'GAS', 'GIO', 'GRO', 'HAM',
       'HAR', 'HUL', 'KUB', 'KVY', 'LAT', 'LEC', 'MAG', 'MAZ', 'MSC',
       'NOR', 'OCO', 'PER', 'PIA', 'RAI', 'RIC', 'RUS', 'SAI', 'SAR',
       'SIR', 'STR', 'TSU', 'VAN', 'VER', 'VET', 'ZHO']
teams = ['Alfa Romeo', 'Alfa Romeo Racing', 'AlphaTauri', 'Alpine', 'Aston Martin', 'Ferrari', 'Haas F1 Team', 'Kick Sauber', 'McLaren', 'Mercedes', 'RB', 'Racing Point', 'Red Bull Racing', 'Renault', 'Sauber', 'Toro Rosso', 'Williams']

for col, values in [('Driver', drivers), ('TeamName', teams)]:
    le = LabelEncoder()
    le.fit(values)
    n_stint_x_y[col] = le.transform(n_stint_x_y[col])
    label_encoders_n_stint[col] = le

# Treat Position as integer
n_stint_x_y['Position'] = n_stint_x_y['Position'].astype(int)

# Features and target
X_n_stint = n_stint_x_y.drop('N stints', axis=1)
y_n_stint = n_stint_x_y['N stints']

# Train-test split
X_train_ns, X_test_ns, y_train_ns, y_test_ns = train_test_split(X_n_stint, y_n_stint, test_size=0.3)

# Model for N stints
n_stint_model = RandomForestRegressor()
n_stint_model.fit(X_train_ns, y_train_ns)

# Cross-validation and evaluation
cv_scores_ns = cross_val_score(n_stint_model, X_train_ns, y_train_ns, cv=5, scoring='neg_mean_squared_error')
y_pred_ns = np.round(n_stint_model.predict(X_test_ns)).astype(int)
mse_ns = mean_squared_error(y_test_ns, y_pred_ns)
mae_ns = mean_absolute_error(y_test_ns, y_pred_ns)

print("Cross-validation MSE (N stints):", -cv_scores_ns.mean())
print("Test MSE (N stints):", mse_ns)
print("Test MAE (N stints):", mae_ns)

# --- Compound and Pit lap Prediction ---
comp_x_y = res_X_Y.copy()
comp_x_y = comp_x_y.drop(columns=['Avg lap time', 'N stints', 'LapTime', 'Sector1Time', 'Sector2Time',
                                  'Sector3Time', 'SpeedI1', 'SpeedI2', 'SpeedFL', 'SpeedST', 'Tyre life'])

# Encoding categorical variables
label_encoders_comp = {}
compounds = ['SOFT', 'HARD', 'MEDIUM', 'INTERMEDIATE', 'WET']

for col, values in [('Driver', drivers), ('TeamName', teams), ('Compound', compounds)]:
    le = LabelEncoder()
    le.fit(values)
    comp_x_y[col] = le.transform(comp_x_y[col])
    label_encoders_comp[col] = le

# Features and targets
X_comp = comp_x_y.drop(['Compound', 'Pit lap'], axis=1)
y_compound = comp_x_y['Compound']
y_pit_lap = comp_x_y['Pit lap']

# Train-test split
X_train_comp, X_test_comp, y_comp_train, y_comp_test, y_pit_train, y_pit_test = train_test_split(
    X_comp, y_compound, y_pit_lap, test_size=0.3)

# Model for Compound (classification)
compound_model = RandomForestClassifier()
compound_model.fit(X_train_comp, y_comp_train)

# Model for Pit lap (regression)
pit_lap_model = RandomForestRegressor()
pit_lap_model.fit(X_train_comp, y_pit_train)

# Predictions and evaluation
y_comp_pred = compound_model.predict(X_test_comp)
y_pit_pred = np.round(pit_lap_model.predict(X_test_comp)).astype(int)

# Evaluation
comp_accuracy = accuracy_score(y_comp_test, y_comp_pred)
pit_mse = mean_squared_error(y_pit_test, y_pit_pred)

print(f"Compound Classification Accuracy: {comp_accuracy}")
print(f"Pit Lap Regression MSE: {pit_mse}")

# --- Lap Time Prediction ---
lap_x_y = res_X_Y.copy()
lap_x_y = lap_x_y.drop(columns=['N stints'])
lap_x_y = lap_x_y[~lap_x_y['Avg lap time'].isna()]

# Encoding categorical variables
label_encoders_lap = {}
for col, values in [('Driver', drivers), ('TeamName', teams), ('Compound', compounds)]:
    le = LabelEncoder()
    le.fit(values)
    lap_x_y[col] = le.transform(lap_x_y[col])
    label_encoders_lap[col] = le

# Features and target
X_lap = lap_x_y.drop(['Avg lap time'], axis=1)
y_lap = lap_x_y['Avg lap time']

# Train-test split
X_train_lap, X_test_lap, y_train_lap, y_test_lap = train_test_split(X_lap, y_lap, test_size=0.3)

# Model for Lap Time (regression)
lap_time_model = RandomForestRegressor()
lap_time_model.fit(X_train_lap, y_train_lap)

# Predictions and evaluation
y_pred_lap = lap_time_model.predict(X_test_lap)
mse_lap = mean_squared_error(y_test_lap, y_pred_lap)

print(f"Mean Squared Error on Lap Time Prediction: {mse_lap}")

Cross-validation MSE (N stints): 0.48371036363636366
Test MSE (N stints): 0.375
Test MAE (N stints): 0.375
Compound Classification Accuracy: 0.7586206896551724
Pit Lap Regression MSE: 151.48275862068965
Mean Squared Error on Lap Time Prediction: 32.12717758848241


In [48]:
Y_feat

Unnamed: 0,Year,Driver,Stint,Compound,Avg lap time,Pit lap,Tyre life,N stints
0,2018,ALO,1.0,ULTRASOFT,87.985333,4.0,4.0,1.0
1,2018,BOT,1.0,ULTRASOFT,84.996727,11.0,11.0,4.0
2,2018,BOT,2.0,SUPERSOFT,83.956838,48.0,37.0,4.0
3,2018,BOT,3.0,ULTRASOFT,86.848143,62.0,14.0,4.0
4,2018,BOT,4.0,HYPERSOFT,83.026125,70.0,8.0,4.0
5,2018,ERI,1.0,HYPERSOFT,87.14425,16.0,16.0,2.0
6,2018,ERI,2.0,SUPERSOFT,86.369566,69.0,53.0,2.0
7,2018,GAS,1.0,HYPERSOFT,92.0146,5.0,5.0,3.0
8,2018,GAS,2.0,SUPERSOFT,85.403381,26.0,21.0,3.0
9,2018,GAS,3.0,SUPERSOFT,86.500884,69.0,43.0,3.0


In [44]:
import os
import pickle

# Create directories if they don't exist
os.makedirs('models', exist_ok=True)
os.makedirs('encoders', exist_ok=True)

# Save the models in the 'models' folder
with open('models/n_stint_model.pkl', 'wb') as f:
    pickle.dump(n_stint_model, f)

with open('models/compound_model.pkl', 'wb') as f:
    pickle.dump(compound_model, f)

with open('models/pit_lap_model.pkl', 'wb') as f:
    pickle.dump(pit_lap_model, f)

with open('models/lap_time_model.pkl', 'wb') as f:
    pickle.dump(lap_time_model, f)

# Save Label Encoders and Scalers in the 'scalers_encoders' folder
with open('encoders/label_encoders_n_stint.pkl', 'wb') as f:
    pickle.dump(label_encoders_n_stint, f)

with open('encoders/label_encoders_comp.pkl', 'wb') as f:
    pickle.dump(label_encoders_comp, f)

with open('encoders/label_encoders_lap.pkl', 'wb') as f:
    pickle.dump(label_encoders_lap, f)

# # Save Scalers
# with open('scalers_encoders/scaler_n_stint.pkl', 'wb') as f:
#     pickle.dump(scaler_n_stint, f)

# with open('scalers_encoders/scaler_comp.pkl', 'wb') as f:
#     pickle.dump(scaler_comp, f)

# with open('scalers_encoders/scaler_lap.pkl', 'wb') as f:
#     pickle.dump(scaler_lap, f)
