In [156]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from io import StringIO

In [None]:
train_df  = pd.read_csv("TrainData.csv")
pred_df = pd.read_csv("PredData.csv")
train_df .head()

Unnamed: 0,Driver,Constructor,RaceName,DriverPointsSoFar,ConstructorPointsSoFar,NextRacePosition,DriverWinRateThisSeason,DriverPodiumRate,AverageQualifyingPosition,DriverExperienceYears,DNFCountThisSeason,TeammateComparison
0,VER,Red Bull Racing,Bahrain Grand Prix,0.0,0.0,1,0.0,0.0,,9,0,0.0
1,VER,Red Bull Racing,Emilia Romagna Grand Prix,77.0,141.0,1,0.75,0.75,1.0,9,1,13.0
2,VER,Red Bull Racing,Japanese Grand Prix,51.0,97.0,1,0.666667,0.666667,1.0,9,1,5.0
3,VER,Red Bull Racing,Australian Grand Prix,51.0,87.0,19,1.0,1.0,1.0,9,0,15.0
4,VER,Red Bull Racing,Monaco Grand Prix,102.0,170.0,6,0.8,0.8,1.0,9,1,34.0


In [167]:
# ---------- PREPARE FEATURES ----------
categorical_cols = ["Driver", "Constructor", "RaceName"]
# numeric columns are everything except categorical + target
numeric_cols = [c for c in train_df.columns if c not in categorical_cols + ["NextRacePosition"]]


In [168]:
ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
ohe.fit(train_df[categorical_cols])

X_cat = ohe.transform(train_df[categorical_cols])
X_num = train_df[numeric_cols].astype(float).values
X_train = np.hstack([X_num, X_cat])
y_train = train_df["NextRacePosition"].astype(float).values

In [169]:
# ---------- TRAIN MODEL ----------
model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_train, y_train)


0,1,2
,n_estimators,200
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [None]:
pred_df.head()

Unnamed: 0,Driver,Constructor,PredictedNextRacePosition
0,LEC,Ferrari,1
1,VER,Red Bull Racing,2
2,RUS,Mercedes,3
3,PIA,McLaren,4
4,NOR,McLaren,5


: 

In [170]:
# ---------- PREPARE PREDICTION MATRIX ----------
Xp_cat = ohe.transform(pred_df[categorical_cols])  # unseen categories become zero-columns
Xp_num = pred_df[numeric_cols].astype(float).values
X_pred = np.hstack([Xp_num, Xp_cat])


In [171]:
# ---------- PREDICT CONTINUOUS POSITIONS, THEN CONVERT TO UNIQUE RANKS 1..N ----------
pred_continuous = model.predict(X_pred)


In [172]:
# Lower predicted number -> better finish (position 1 is best). Sort ascending.
order = np.argsort(pred_continuous)          # indices ordered by predicted finish (best -> worst)
ranks = np.empty_like(order, dtype=int)
ranks[order] = np.arange(1, len(pred_continuous) + 1)  # unique integers 1..N


In [173]:
pred_df["PredictedNextRacePosition"] = ranks

# show ordered final grid (position 1..N)
final = pred_df[["Driver", "Constructor", "PredictedNextRacePosition"]].sort_values(
    "PredictedNextRacePosition"
).reset_index(drop=True)

print(final.to_string(index=False))

Driver     Constructor  PredictedNextRacePosition
   LEC         Ferrari                          1
   VER Red Bull Racing                          2
   RUS        Mercedes                          3
   PIA         McLaren                          4
   NOR         McLaren                          5
   HAM         Ferrari                          6
   ANT        Mercedes                          7
   HAD    Racing Bulls                          8
   ALO    Aston Martin                          9
   TSU Red Bull Racing                         10
   COL          Alpine                         11
   HUL     Kick Sauber                         12
   DOO          Alpine                         13
   LAW    Racing Bulls                         14
   BOR     Kick Sauber                         15
   GAS          Alpine                         16
   BEA    Haas F1 Team                         17
   OCO    Haas F1 Team                         18
   STR    Aston Martin                         19


In [174]:
import pickle


In [175]:
with open("race_position_model.pkl", "wb") as f:
    pickle.dump((model, ohe, numeric_cols, categorical_cols), f)

In [176]:
final.to_csv("final.csv",index=0)