# F1 Safety Car Predictor

## Authors: Petar Stamenković, Aleksa Mitrovčan

### Data Set

In [7]:
import pandas as pd
import os

# Define relative path to the CSV file
relative_path = os.path.join("archive", "races.csv")

# Read the CSV file using the relative path
races = pd.read_csv(relative_path)[['year', 'name', 'date']]

races


Unnamed: 0,year,name,date
0,2009,Australian Grand Prix,2009-03-29
1,2009,Malaysian Grand Prix,2009-04-05
2,2009,Chinese Grand Prix,2009-04-19
3,2009,Bahrain Grand Prix,2009-04-26
4,2009,Spanish Grand Prix,2009-05-10
...,...,...,...
1120,2024,Mexico City Grand Prix,2024-10-27
1121,2024,São Paulo Grand Prix,2024-11-03
1122,2024,Las Vegas Grand Prix,2024-11-23
1123,2024,Qatar Grand Prix,2024-12-01


### Data Preprocessing
In this step we saved all the races from 2014 to 2024 into a csv file, and added additional columns for precise data training. Than we added necessary data to the table.

In [17]:
# Data Preprocessing

races_filtered = races[races['year'] >= 2014]
races_filtered['weather'] = ''
races_filtered['num_dnfs'] = ''
races_filtered['circuit_type'] = ''
races_filtered['phy_diff'] = ''
races_filtered['safety_car']=''

# Save the filtered DataFrame to a new CSV file
#races_filtered.to_csv(''/home/alex64a/F1-Lap-Time-Predictor/archive/races_filtered.csv'', index=False)

print("File saved successfully!")

File saved successfully!


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  races_filtered['weather'] = ''
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  races_filtered['num_dnfs'] = ''
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  races_filtered['circuit_type'] = ''
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer]

## Import new table with weather track data.

In [3]:
safety_car_predictor = pd.read_csv("safety_car_predictor.csv")
safety_car_predictor

Unnamed: 0,year,name,date,weather,num_dnfs,circuit_type,phy_diff,safety_car
0,2014,Australian Grand Prix,3/16/2014,dry,7,street,2,True
1,2014,Malaysian Grand Prix,3/30/2014,dry,6,traditional,4,False
2,2014,Bahrain Grand Prix,4/6/2014,dry,6,traditional,3,True
3,2014,Chinese Grand Prix,4/20/2014,dry,2,traditional,2,False
4,2014,Spanish Grand Prix,5/11/2014,dry,2,traditional,2,False
...,...,...,...,...,...,...,...,...
223,2024,Mexico City Grand Prix,10/27/2024,dry,3,traditional,3,True
224,2024,Sao Paulo Grand Prix,11/3/2024,heavy_rain,5,traditional,2,True
225,2024,Las Vegas Grand Prix,11/23/2024,dry,2,street,3,False
226,2024,Qatar Grand Prix,12/1/2024,dry,5,street,4,True


## Data modification for 

In [40]:
query_name = "Australian Grand Prix"
safety_car_predictor_circuit = safety_car_predictor.loc[safety_car_predictor.name == query_name].copy()

# Replace values in the 'weather' column
safety_car_predictor_circuit['weather'] = safety_car_predictor_circuit['weather'].replace({
    "dry": 0,
    "light_rain": 1,
    "heavy_rain": 2
})

safety_car_predictor_circuit['circuit_type'] = safety_car_predictor_circuit['circuit_type'].replace({
    "traditional" : 0,
    "street" : 1
})

safety_car_predictor_circuit


Unnamed: 0,year,name,date,weather,num_dnfs,circuit_type,phy_diff,safety_car
0,2014,Australian Grand Prix,3/16/2014,0,7,1,2,True
24,2015,Australian Grand Prix,3/15/2015,0,4,1,2,True
38,2016,Australian Grand Prix,3/20/2016,0,5,1,2,True
59,2017,Australian Grand Prix,3/26/2017,0,7,1,2,False
79,2018,Australian Grand Prix,3/25/2018,0,5,1,2,True
100,2019,Australian Grand Prix,3/17/2019,0,3,1,2,False
162,2022,Australian Grand Prix,4/10/2022,0,3,1,2,True
184,2023,Australian Grand Prix,4/2/2023,0,8,1,2,True
206,2024,Australian Grand Prix,3/24/2024,0,3,1,2,True


## Compute the mean of number of dnfs for given GP

In [44]:
average_dnf = safety_car_predictor_circuit.num_dnfs.mean()
print(f"Average number of dnf's for circuit {query_name} is {average_dnf}")


Average number of dnf's for circuit Australian Grand Prix is 5.0


## Random forrest

In [50]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split


features = ['weather', 'circuit_type', 'phy_diff']
X = safety_car_predictor_circuit[features]
y = safety_car_predictor_circuit.safety_car

train_X,val_x, train_y, val_y = train_test_split(X,y,random_state = 0)

forest_model = RandomForestRegressor(random_state=1)
forest_model.fit(train_X, train_y)
melb_preds = forest_model.predict(val_x)
print(f"Prediction : {melb_preds}")
print(val_x.shape)  # How many rows are in the validation set?
print(f"Train size: {train_X.shape}, Validation size: {val_x.shape}")
print(f"val_x is {val_x}]")


Prediction : [0.67333333 0.67333333 0.67333333]
(3, 3)
Train size: (6, 3), Validation size: (3, 3)
val_x is      weather  circuit_type  phy_diff
184        0             1         2
38         0             1         2
24         0             1         2]
