In [183]:
import pandas as pd
import numpy as np
import fastf1 as ff1
from datetime import datetime as dt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', None)

In [303]:
def get_dataframe_schedule(year):
    df = ff1.get_event_schedule(year)
    return (
        df
        .drop(columns=["Location", "OfficialEventName", "Session1Date", "Session1DateUtc", "Session2Date", "Session2DateUtc", "Session3Date", "Session3DateUtc",
                       "Session4Date", "Session4DateUtc", "Session5Date", "Session5DateUtc", "F1ApiSupport"])
        .loc[~df['EventName'].str.contains('Pre-Season', na=False)]
        .reset_index(drop=True)
    )

def get_race(year, gp):
    race = ff1.get_session(year, gp, "R")
    race.load(laps=False, telemetry=False, weather=False, messages=False, livedata=False)
    race_results_df = race.results
    race_results_df = race_results_df.drop(columns=["BroadcastName", "Abbreviation", "DriverId", "TeamColor", "TeamId", "FirstName", "LastName", "HeadshotUrl", "CountryCode", "Q1", "Q2", "Q3", "Time"])
    df_event = get_dataframe_schedule(year)
    event_name = df_event.at[gp-1, "EventName"]
    race_results_df["EventName"] = event_name
    return race_results_df

def get_laps_data(year, gp):
    race = ff1.get_session(year, gp, "R")
    race.load(telemetry=False, weather=False, messages=False, livedata=False)
    df_laps = race.laps
    df = df_laps.drop(columns=["Time", "PitOutTime", "PitInTime", "Sector1SessionTime", "Sector2SessionTime", "Sector3SessionTime", "SpeedFL", "SpeedST",
                           "IsPersonalBest", "LapStartTime", "LapStartDate", "TrackStatus", "DeletedReason", "FastF1Generated", "IsAccurate"])

In [304]:
def run_prediction():
    df = pd.read_csv("df_test.csv")
    
    #df = df.drop(columns=["Unnamed: 0", "Session5DateUtc", "FullName", "Points", "LapNumber", "ClassifiedPosition", "Position_y"])

    season = dt.now().year
    events = ff1.get_event_schedule(season)
    today = pd.Timestamp.now()
    past_events = events[events["EventDate"] <= today]
    last_event = past_events.iloc[-1]
    race = last_event.RoundNumber

    if race in df["EventName"].values:
        return df
    else:
        df_race = get_race(2024, race)
        laps = get_laps_data(2024, race)

        if pd.isna(df_race["Position"].iloc[0]):
            max_lap_number = laps['LapNumber'].max()
            last_lap = laps[laps["LapNumber"] == max_lap_number]
            last_positions = last_lap.set_index('DriverNumber')['Position']
            df_race["Position"] = df_race["DriverNumber"].map(last_positions).fillna(0).astype(int)
            df_race['IsZero'] = df_race['Position'] == 0
            df_race = df_race.sort_values(by=['IsZero', 'Position']).reset_index(drop=True)
            df_race = df_race.drop(columns=['IsZero'])

        df_last_race = pd.merge(df_race, laps, on="DriverNumber")
        df_last_race["Sector1Time"] = pd.to_timedelta(df_last_race["Sector1Time"])
        df_last_race["Sector2Time"] = pd.to_timedelta(df_last_race["Sector2Time"])
        df_last_race["Sector3Time"] = pd.to_timedelta(df_last_race["Sector3Time"])
        df_last_race["LapTime"] = pd.to_timedelta(df_last_race["LapTime"])

        def fill_na_mean(df, cols):
            for col in cols:
                mean_values = df.groupby(["EventName", "FullName"])[col].transform(lambda x: x.dropna().mean())
                df[col].fillna(mean_values, inplace=True)
            return df

        columns_to_fill = ['SpeedI1', 'SpeedI2', 'Sector1Time', 'Sector2Time', 'Sector3Time', 'LapTime']   
        df_last_race = fill_na_mean(df_last_race, columns_to_fill)
        df_last_race['Sector1Time'] = df_last_race['Sector1Time'].dt.total_seconds()
        df_last_race['Sector2Time'] = df_last_race['Sector2Time'].dt.total_seconds()
        df_last_race['Sector3Time'] = df_last_race['Sector3Time'].dt.total_seconds()
        df_last_race['LapTime'] = df_last_race['LapTime'].dt.total_seconds()

        df_last_race['Status'] = np.where(df_last_race['Status'] == 'Finished', 1, 0)

        values_to_replace = ["R", "D", "E", "W", "F", "N"]
        df_last_race["ClassifiedPosition"] = np.where(df_last_race["ClassifiedPosition"].isin(values_to_replace), 0, df_last_race["ClassifiedPosition"])

        df_last_race["FreshTyre"] = df_last_race["FreshTyre"].astype(int)

        label_encoder = LabelEncoder()

        df_last_race['EventName'] = race
        df_last_race['Compound'] = label_encoder.fit_transform(df_last_race['Compound'])
        df_last_race["NameEncoder"] = label_encoder.fit_transform(df_last_race["FullName"])

        df_last_race = df_last_race.drop(columns=["DriverNumber", "TeamName", "FullName",
                                    "ClassifiedPosition", "Points",
                                    "Driver", "LapNumber", "Team",
                                    "Position_y", "Deleted"])

        df_last_race = df_last_race.dropna()
        event_name = df_last_race.pop('EventName')
        df_last_race.insert(0, 'EventName', event_name)
        df_concat_to_model = pd.concat([df, df_last_race], ignore_index=True)
        return df_concat_to_model
    
df = run_prediction()

In [404]:
def mean_features(df):

    mean_df = df.groupby('NameEncoder').agg({
        "GridPosition": 'mean',
        "Status": 'mean',
        "LapTime": 'mean',
        "Stint": 'mean',
        "Sector1Time": 'mean',
        "Sector2Time": 'mean',
        "Sector3Time": 'mean',
        "SpeedI1": 'mean',
        "SpeedI2": 'mean',
        "Compound": 'mean',
        "TyreLife": 'mean',
        "FreshTyre": 'mean',
    }).reset_index()
    mean_df[['LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time', 'SpeedI1', 'SpeedI2', 'TyreLife']] = scaler.fit_transform(
        mean_df[['LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time', 'SpeedI1', 'SpeedI2', 'TyreLife']]
)
    
    name_encoder = mean_df.pop('NameEncoder')
    mean_df['NameEncoder'] = name_encoder
    
    return mean_df

In [305]:
scaler = StandardScaler()

df_scaler = df.copy()

df_scaler[['LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time', 'SpeedI1', 'SpeedI2', 'TyreLife']] = scaler.fit_transform(
    df_scaler[['LapTime', 'Sector1Time', 'Sector2Time', 'Sector3Time', 'SpeedI1', 'SpeedI2', 'TyreLife']]
)

In [308]:
encoder_combination = pd.read_csv("df_model.csv")
encoder_combination = encoder_combination[["FullName", "NameEncoder"]].drop_duplicates()
encoder_combination

Unnamed: 0,FullName,NameEncoder
0,Max Verstappen,13
747,Sergio Perez,18
1451,Carlos Sainz,1
2185,Charles Leclerc,2
2956,George Russell,6
3736,Lando Norris,10
4530,Lewis Hamilton,11
5288,Oscar Piastri,16
6089,Fernando Alonso,5
6885,Lance Stroll,9


In [309]:
df_scaler["Position_x"] = np.where(df_scaler["Position_x"] == 1.0, 1, 0)

In [310]:
df_scaler

Unnamed: 0,EventName,Position_x,GridPosition,Status,LapTime,Stint,Sector1Time,Sector2Time,Sector3Time,SpeedI1,SpeedI2,Compound,TyreLife,FreshTyre,NameEncoder
0,2,1,1.0,1,0.069533,1.0,-0.065233,1.176261,-0.534298,-0.442171,0.032634,3,-0.899308,0,13
1,2,1,1.0,1,0.057122,1.0,0.411457,1.234199,-0.521686,-0.496297,-0.016132,3,-0.815752,0,13
2,2,1,1.0,1,0.062863,1.0,0.424069,1.278936,-0.513236,-0.523360,-0.138047,3,-0.732196,0,13
3,2,1,1.0,1,0.061531,1.0,0.413736,1.268082,-0.508828,-0.469234,0.105784,3,-0.648640,0,13
4,2,1,1.0,1,0.068139,1.0,0.462968,1.292137,-0.504175,-0.523360,-0.089281,3,-0.565084,0,13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191872,14,0,6.0,0,0.199507,2.0,0.422549,2.098725,0.135653,1.804053,-0.991454,0,1.273145,1,6
191873,14,0,6.0,0,0.201052,2.0,0.412977,2.134075,0.128918,2.023483,-0.991454,0,1.356700,1,6
191874,14,0,6.0,0,0.197422,2.0,0.426348,2.094325,0.115937,1.993494,-0.967071,0,1.440256,1,6
191875,14,0,6.0,0,0.195361,2.0,0.422701,2.087577,0.104427,1.695802,-0.967071,0,1.523812,1,6


In [321]:
X = df_scaler.drop(columns=["Position_x", "EventName"])
y = df_scaler["Position_x"]

In [398]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [386]:
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X, y, epochs=10, batch_size=64, validation_data=(X_test, y_test))

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2999/2999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 821us/step - accuracy: 0.8195 - loss: 0.4603 - val_accuracy: 0.9477 - val_loss: 0.1346
Epoch 2/10
[1m2999/2999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 786us/step - accuracy: 0.9475 - loss: 0.1389 - val_accuracy: 0.9500 - val_loss: 0.1191
Epoch 3/10
[1m2999/2999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 788us/step - accuracy: 0.9505 - loss: 0.1232 - val_accuracy: 0.9549 - val_loss: 0.1146
Epoch 4/10
[1m2999/2999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 789us/step - accuracy: 0.9525 - loss: 0.1174 - val_accuracy: 0.9570 - val_loss: 0.1102
Epoch 5/10
[1m2999/2999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 783us/step - accuracy: 0.9538 - loss: 0.1140 - val_accuracy: 0.9579 - val_loss: 0.1055
Epoch 6/10
[1m2999/2999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 779us/step - accuracy: 0.9559 - loss: 0.1091 - val_accuracy: 0.9584 - val_loss: 0.1019
Epoch 7/10
[1m

<keras.src.callbacks.history.History at 0x1fe55dcc750>

In [430]:
from tensorflow.keras.layers import BatchNormalization, LeakyReLU
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.regularizers import l2

model = Sequential()

model.add(Dense(124, input_dim=X_train.shape[1], kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))
model.add(Dropout(0.5))

model.add(Dense(64, kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))
model.add(Dropout(0.3))

model.add(Dense(32, kernel_regularizer=l2(0.001)))
model.add(BatchNormalization())
model.add(LeakyReLU(alpha=0.1))

model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-6)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test), callbacks=[reduce_lr, early_stopping])


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2399/2399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - accuracy: 0.9299 - loss: 0.2836 - val_accuracy: 0.9593 - val_loss: 0.1149 - learning_rate: 0.0010
Epoch 2/10
[1m2399/2399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9549 - loss: 0.1264 - val_accuracy: 0.9592 - val_loss: 0.1029 - learning_rate: 0.0010
Epoch 3/10
[1m2399/2399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9570 - loss: 0.1172 - val_accuracy: 0.9567 - val_loss: 0.1074 - learning_rate: 0.0010
Epoch 4/10
[1m2399/2399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9561 - loss: 0.1150 - val_accuracy: 0.9618 - val_loss: 0.1050 - learning_rate: 0.0010
Epoch 5/10
[1m2399/2399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9586 - loss: 0.1122 - val_accuracy: 0.9619 - val_loss: 0.0955 - learning_rate: 0.0010
Epoch 6/10
[1m2399/2399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

<keras.src.callbacks.history.History at 0x1fec3af02d0>

In [431]:
df_to_predict = df.copy()
df_to_predict = mean_features(df_to_predict)
probabilities = model.predict(df_to_predict)
np.set_printoptions(suppress=True)
prob_check = np.round(probabilities * 100, 2)
encoder_combination = encoder_combination.sort_values(by='NameEncoder')
encoder_combination["Probability"] = prob_check
encoder_combination

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step


Unnamed: 0,FullName,NameEncoder,Probability
10583,Alexander Albon,0,0.0
1451,Carlos Sainz,1,0.25
2185,Charles Leclerc,2,0.2
9102,Daniel Ricciardo,3,0.0
12016,Esteban Ocon,4,0.0
6089,Fernando Alonso,5,0.05
2956,George Russell,6,0.44
7636,Guanyu Zhou,7,0.0
8384,Kevin Magnussen,8,0.01
6885,Lance Stroll,9,0.1
