# F1 Safety Car Predictor

## Authors: Petar Stamenković, Aleksa Mitrovčan

## Import new table with weather track data.

In [3]:
import pandas as pd
safety_car_predictor = pd.read_csv("safety_car_predictor.csv")

#safety_car_predictor

## Creating a new DataFrame for mapping circuitID and safety car predictor table

In [8]:
safety_car_predictor.name.unique() # List out all circuits in order to create a new DataFrame for circuit ID
circuit_mapping = {
    'Australian Grand Prix': 1,
    'Malaysian Grand Prix': 2,
    'Chinese Grand Prix': 3,
    'Bahrain Grand Prix': 4,
    'Spanish Grand Prix': 5,
    'Monaco Grand Prix': 6,
    'Canadian Grand Prix': 7,
    'British Grand Prix': 8,
    'German Grand Prix': 9,
    'Hungarian Grand Prix': 10,
    'Belgian Grand Prix': 11,
    'Italian Grand Prix': 12,
    'Singapore Grand Prix': 13,
    'Korean Grand Prix': 14,
    'Japanese Grand Prix': 15,
    'Indian Grand Prix': 16,
    'Abu Dhabi Grand Prix': 17,
    'United States Grand Prix': 18,
    'Brazilian Grand Prix': 19,
    'Austrian Grand Prix': 20,
    'Russian Grand Prix': 21,
    'Mexican Grand Prix': 22,
    'Azerbaijan Grand Prix': 23,
    'French Grand Prix': 24,
    'Tuscan Grand Prix': 25,
    'Eifel Grand Prix': 26,
    'Portuguese Grand Prix': 27,
    'Emilia Romagna Grand Prix': 28,
    'Turkish Grand Prix': 29,
    'Qatar Grand Prix': 30,
    'Dutch Grand Prix': 31,
    'Mexico City Grand Prix': 32,
    'Sao Paulo Grand Prix': 33,
    'Saudi Arabian Grand Prix': 34,
    'Miami Grand Prix': 35,
    'Las Vegas Grand Prix': 36
}

#safety_car_predictor.insert(0, 'circuit_id', safety_car_predictor['name'].map(circuit_mapping))
safety_car_predictor

Unnamed: 0,circuit_id,year,name,date,weather,num_dnfs,circuit_type,phy_diff,safety_car
0,1.0,2009,Australian Grand Prix,03/29/09,dry,7,street,2,True
1,2.0,2009,Malaysian Grand Prix,04/05/09,heavy_rain,5,traditional,4,True
2,3.0,2009,Chinese Grand Prix,04/19/09,heavy_rain,4,traditional,2,True
3,4.0,2009,Bahrain Grand Prix,04/26/09,dry,1,traditional,3,False
4,5.0,2009,Spanish Grand Prix,05/10/09,dry,6,traditional,2,True
...,...,...,...,...,...,...,...,...,...
317,32.0,2024,Mexico City Grand Prix,10/27/2024,dry,3,traditional,3,True
318,33.0,2024,Sao Paulo Grand Prix,11/3/2024,heavy_rain,5,traditional,2,True
319,36.0,2024,Las Vegas Grand Prix,11/23/2024,dry,2,street,3,False
320,30.0,2024,Qatar Grand Prix,12/1/2024,dry,5,street,4,True


## Calculate average number of dnfs for each circuit and create a separate column

In [10]:
safety_car_predictor['avg_dnfs_per_circuit'] = safety_car_predictor.groupby('name')['num_dnfs'].transform('mean')
safety_car_predictor

Unnamed: 0,circuit_id,year,name,date,weather,num_dnfs,circuit_type,phy_diff,safety_car,avg_dnfs_per_circuit
0,1.0,2009,Australian Grand Prix,03/29/09,dry,7,street,2,True,5.857143
1,2.0,2009,Malaysian Grand Prix,04/05/09,heavy_rain,5,traditional,4,True,5.111111
2,3.0,2009,Chinese Grand Prix,04/19/09,heavy_rain,4,traditional,2,True,2.916667
3,4.0,2009,Bahrain Grand Prix,04/26/09,dry,1,traditional,3,False,3.375000
4,5.0,2009,Spanish Grand Prix,05/10/09,dry,6,traditional,2,True,3.000000
...,...,...,...,...,...,...,...,...,...,...
317,32.0,2024,Mexico City Grand Prix,10/27/2024,dry,3,traditional,3,True,2.750000
318,33.0,2024,Sao Paulo Grand Prix,11/3/2024,heavy_rain,5,traditional,2,True,4.000000
319,36.0,2024,Las Vegas Grand Prix,11/23/2024,dry,2,street,3,False,2.500000
320,30.0,2024,Qatar Grand Prix,12/1/2024,dry,5,street,4,True,3.000000


## Data modification for 

In [11]:
# query_name = "Australian Grand Prix"
# safety_car_predictor_circuit = safety_car_predictor.loc[safety_car_predictor.name == query_name].copy()

# Replace string values in the 'weather' column
safety_car_predictor['weather'] = safety_car_predictor['weather'].replace({
    "dry": 0,
    "light_rain": 1,
    "heavy_rain": 2
})

# Replace string values in the 'circuit type' column
safety_car_predictor['circuit_type'] = safety_car_predictor['circuit_type'].replace({
    "traditional" : 0,
    "street" : 1
})

safety_car_predictor


Unnamed: 0,circuit_id,year,name,date,weather,num_dnfs,circuit_type,phy_diff,safety_car,avg_dnfs_per_circuit
0,1.0,2009,Australian Grand Prix,03/29/09,0,7,1,2,True,5.857143
1,2.0,2009,Malaysian Grand Prix,04/05/09,2,5,0,4,True,5.111111
2,3.0,2009,Chinese Grand Prix,04/19/09,2,4,0,2,True,2.916667
3,4.0,2009,Bahrain Grand Prix,04/26/09,0,1,0,3,False,3.375000
4,5.0,2009,Spanish Grand Prix,05/10/09,0,6,0,2,True,3.000000
...,...,...,...,...,...,...,...,...,...,...
317,32.0,2024,Mexico City Grand Prix,10/27/2024,0,3,0,3,True,2.750000
318,33.0,2024,Sao Paulo Grand Prix,11/3/2024,2,5,0,2,True,4.000000
319,36.0,2024,Las Vegas Grand Prix,11/23/2024,0,2,1,3,False,2.500000
320,30.0,2024,Qatar Grand Prix,12/1/2024,0,5,1,4,True,3.000000


## Random forrest

In [12]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

features = ['weather', 'circuit_type', 'phy_diff','circuit_id','avg_dnfs_per_circuit']
X = safety_car_predictor[features]
y = safety_car_predictor.safety_car

train_X, val_x, train_y, val_y = train_test_split(X,y,random_state = 0)

forest_model = RandomForestRegressor(random_state=1)
forest_model.fit(train_X, train_y)

# Prediction sample has to be a 2-dimensional array [x,y,z]!
# x is a weather parameter and it maps to : 0 - dry , 1 - light rain , 2 - heavy rain
# y is a type of circuit parameter and it maps to : 0 - traditional , 1 - street
# z is a physical difficulty of circuit parameter and it has 4 levels

prediction_sample = [[1,1,2,1,5.857]] # For instance this predicts a safety car chance for Australian Grand Prix with light rain!
safety_car_deployment_prediction = forest_model.predict(prediction_sample) * 100


print(f"Prediction : {safety_car_deployment_prediction}%")



Prediction : [84.0270202]%




In [13]:
print(safety_car_predictor.groupby('weather')['safety_car'].mean()) # This shows what is the effect of a weather on a target variable


weather
0    0.611511
1    0.655172
2    0.933333
Name: safety_car, dtype: float64


In [26]:
for column in range(len(features)):
    print(features[column], forest_model.feature_importances_[column]) # This command shows how important each column is for our model


weather 0.1628632820795904
circuit_type 0.05588412566938008
phy_diff 0.10940421307666831
circuit_id 0.3828722579139251
avg_dnfs_per_circuit 0.2889761212604362
