In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer
import joblib
from sklearn.pipeline import Pipeline

In [4]:
df = pd.read_csv('/Users/rahul/Desktop/F1 Project/DF_Merging_and_Model_Fitting/final.csv')

In [5]:
df.columns

Index(['circuit_location', 'circuit_country', 'grid_position',
       'results_position', 'results_points', 'total_laps',
       'driver_nationality', 'driver_home', 'driver_age_atrace',
       'constructor_name', 'constructor_home', 'q_avg', 'status',
       'qual_position', 'q1', 'q2', 'q3', 'lap', 'position_at_given_lap',
       'average_lap_time', 'lap_time_for_current_lap', 'total_pit_stops',
       'weather_conditions'],
      dtype='object')

# Encoding:

In [6]:
df.head()

Unnamed: 0,circuit_location,circuit_country,grid_position,results_position,results_points,total_laps,driver_nationality,driver_home,driver_age_atrace,constructor_name,...,qual_position,q1,q2,q3,lap,position_at_given_lap,average_lap_time,lap_time_for_current_lap,total_pit_stops,weather_conditions
0,Melbourne,Australia,2,1,25.0,57,German,Germany,32,Mercedes,...,2.0,86.934,84.796,84.197,1.0,3.0,113.957281,99.316,2,Overcast
1,Melbourne,Australia,2,1,25.0,57,German,Germany,32,Mercedes,...,2.0,86.934,84.796,84.197,2.0,3.0,113.957281,92.037,2,Overcast
2,Melbourne,Australia,2,1,25.0,57,German,Germany,32,Mercedes,...,2.0,86.934,84.796,84.197,3.0,3.0,113.957281,92.747,2,Overcast
3,Melbourne,Australia,2,1,25.0,57,German,Germany,32,Mercedes,...,2.0,86.934,84.796,84.197,4.0,3.0,113.957281,92.141,2,Overcast
4,Melbourne,Australia,2,1,25.0,57,German,Germany,32,Mercedes,...,2.0,86.934,84.796,84.197,5.0,3.0,113.957281,91.575,2,Overcast


In [7]:
# One-Hot Encoding columns
categorical_features = ['driver_nationality', 'constructor_name', 'circuit_location', 'weather_conditions']

columns_to_drop = ['results_position', 'results_points', 'total_laps', 
                     'driver_home', 'status', 'q1', 'q2', 'q3', 
                     'lap', 'position_at_given_lap', 'lap_time_for_current_lap','circuit_country','qual_position','q_avg','constructor_home']

X = df.drop(columns=columns_to_drop, axis=1)
y = df['results_position']

# Model Fitting:

In [8]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), ['grid_position', 'driver_age_atrace', 'average_lap_time', 'total_pit_stops']),
        ('cat', OneHotEncoder(drop='first'), categorical_features)
    ])

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('pca', PCA(n_components=4)),  
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

pipeline.fit(X_train, y_train)

joblib.dump(pipeline, 'f1_pipeline.pkl')

y_pred = pipeline.predict(X_test)

print("Random Forest Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Random Forest Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           1       1.00      1.00      1.00      1866
           2       1.00      1.00      1.00      1847
           3       1.00      1.00      1.00      1853
           4       1.00      1.00      1.00      1863
           5       1.00      1.00      1.00      1845
           6       1.00      1.00      1.00      1821
           7       1.00      1.00      1.00      1820
           8       1.00      1.00      1.00      1844
           9       1.00      1.00      1.00      1817
          10       1.00      1.00      1.00      1814
          11       1.00      1.00      1.00      1810
          12       1.00      1.00      1.00      1847
          13       1.00      1.00      1.00      1865
          14       1.00      1.00      1.00      1810
          15       1.00      1.00      1.00      1785
          16       1.00      1.00      1.00      1652
          17       1.00      