### Fully Connected Neural Network - Classification

In [17]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
import pandas as pd
import requests
import collections
import numpy as np

full_data = pd.read_csv('f1_data.csv')

In [18]:
def create_classification_model(num_features, units=(128, 64), activation='relu', dropout_rate=0.0):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(units[0], activation=activation, input_shape=(num_features,)),
        tf.keras.layers.Dense(units[1], activation=activation),
        tf.keras.layers.Dropout(dropout_rate),
        tf.keras.layers.Dense(1, activation='sigmoid')  # Adjusted to 1 unit for binary classification
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy')
    return model


Accuracy: 0.774122113255299


In [42]:
# Using formuladata api

def get_location_details(location):
    url = 'https://formuladataapi.pythonanywhere.com/api/f1/circuit_data'
    filters = {}
    filters['location'] = location
    response = requests.get(url, params=filters)
    data = response.json()
    try:
      latitude = data[0]['latitude']
      longitude = data[0]['longitude']
      circuit_length = float(data[0]['circuit_length'][0:3])
    except:
      return None
    return [latitude, longitude, circuit_length]


def get_fp_details(driver, season, round):
    url = 'https://formuladataapi.pythonanywhere.com/api/f1'
    filters = {}
    filters['driver_name'] = driver
    filters['round'] = round
    filters['season'] = season
    response = requests.get(url, params=filters)
    data = response.json()
    try:
      fp1 = int(data[0]['fp1_position'])
    except:
      fp1 = None
    try:
      fp2 = int(data[0]['fp2_position'])
    except:
      fp2 = None
    try:
      fp3 = int(data[0]['fp3_position'])
    except:
      fp3 = None
    return [fp1, fp2, fp3]

In [43]:
def get_race_results_with_fp(map, season, round, location, weather, XX, model, fps):
    race_results = {}
    weather_dict = {'dry':0, "cloudy":1, "wet":2}
    latitude = get_location_details(location)[0]
    longitude = get_location_details(location)[1]
    circuit_length = get_location_details(location)[2]

    for driver, team in map.items():
        datapoint = [0]*XX.shape[1]
        datapoint[0] = season
        datapoint[1] = round
        datapoint[2] = weather_dict[weather]
        try:
            datapoint[3] = int(fps[driver][0])
        except:
            datapoint[3] = 20
        try:
            datapoint[4] = int(fps[driver][1])
        except:
            datapoint[4] = 20
        try:
            datapoint[5] = int(fps[driver][2])
        except:
            datapoint[5] = 20
        datapoint[6] = circuit_length
        datapoint[7] = latitude
        datapoint[8] = longitude

        loc = location.lower().replace(' ', '_')
        location_index = XX.columns.get_loc(f'location_{loc}')
        datapoint[location_index] = 1
    
        driver_index = XX.columns.get_loc(f'driver_name_{driver}')
        team_index = XX.columns.get_loc(f'constructor_name_{team}')
        datapoint[driver_index] = 1
        datapoint[team_index] = 1

        df = pd.DataFrame([datapoint], columns=XX.columns)
        test_prediction = model.predict(df ,verbose=0)
        race_results[driver] = test_prediction[0][0]
    sorted_results = sorted(race_results.items(), key=lambda x: x[1], reverse = True)
    sorted_results = collections.OrderedDict(sorted_results)
    return sorted_results

In [44]:
driver_team_mapping = {
    'max_verstappen': 'red_bull_racing',
    'fernando_alonso': 'aston_martin',
    'lewis_hamilton': 'mercedes',
    'charles_leclerc': 'ferrari',
    'carlos_sainz': 'ferrari',
    'sergio_perez': 'red_bull_racing',
    'alexander_albon': 'williams',
    'esteban_ocon': 'aston_martin',
    'lance_stroll': 'aston_martin',
    'valtteri_bottas': 'alfa_romeo',
    'oscar_piastri': 'mclaren',
    'pierre_gasly': 'renault',
    'lando_norris': 'mclaren',
    'yuki_tsunoda': 'toro_rosso',
    'nico_hulkenberg': 'haas',
    'zhou_guanyu': 'alfa_romeo',
    'kevin_magnussen': 'haas',
    'nyck_de_vries': 'toro_rosso',
    'george_russell': 'mercedes',
    'logan_sargeant': 'williams'
}

In [45]:
fps = {}
for driver, _ in driver_team_mapping.items():
    drivers = driver.split('_')
    for d in range(len(drivers)):
        drivers[d] = drivers[d][0].upper() + drivers[d][1:]
    driver_parsed = ' '.join(drivers)
    fps[driver] = get_fp_details(driver_parsed, 2023, 10)

print(fps)

{'max_verstappen': [1, 1, 8], 'fernando_alonso': [4, 10, 3], 'lewis_hamilton': [12, 15, 5], 'charles_leclerc': [5, None, 1], 'carlos_sainz': [7, 2, 6], 'sergio_perez': [2, 4, 14], 'alexander_albon': [3, 3, 2], 'esteban_ocon': [6, 13, 16], 'lance_stroll': [9, 6, 13], 'valtteri_bottas': [15, 16, 18], 'oscar_piastri': [10, 9, 17], 'pierre_gasly': [13, 8, 4], 'lando_norris': [8, 14, 12], 'yuki_tsunoda': [16, 18, 10], 'nico_hulkenberg': [20, 7, 19], 'zhou_guanyu': [18, 11, 20], 'kevin_magnussen': [19, 17, 15], 'nyck_de_vries': [11, 19, 11], 'george_russell': [14, 12, 9], 'logan_sargeant': [17, 5, 7]}


##### Race Prediction

In [None]:
race_data = full_data.iloc[:]
race_data["in_top_5"] = race_data['race_finishing_position'].apply(lambda x: 1 if x<=5 else 0)
race_data = race_data.drop(["grid_position", "has_fastest_lap","race_laps_completed","points", "fastest_lap_position", "race_finishing_position"], axis = 1)

# 2021: 14744

X = race_data.drop('in_top_5', axis=1)  
y = race_data['in_top_5']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=25)
num_features = X_train.shape[1]
print("Training data shape:", X_train.shape, y_train.shape)
print("Testing data shape:", X_test.shape, y_test.shape)

In [None]:
race_model = create_classification_model(num_features, (64, 32), 'relu', 0.1)
race_model.fit(X_train, y_train, epochs=100, batch_size=32)

accuracy = race_model.evaluate(X_test, y_test)
predictions = race_model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)
accuracy = accuracy_score(y_test, predicted_labels)
print('Accuracy:', accuracy)

In [46]:
results_race = get_race_results_with_fp(driver_team_mapping, 2023, 10, 'Great Britain', 'cloudy', X_test, race_model, fps)
arr_race = []

for key, val in results_race.items():
    arr_race.append((key,"{:.2f}".format(float(val))))


for item in arr_race:
    print(item[0], item[1])

max_verstappen 0.66
alexander_albon 0.58
carlos_sainz 0.48
sergio_perez 0.46
lewis_hamilton 0.43
fernando_alonso 0.35
charles_leclerc 0.31
george_russell 0.24
pierre_gasly 0.15
logan_sargeant 0.15
lando_norris 0.09
lance_stroll 0.02
oscar_piastri 0.02
esteban_ocon 0.01
valtteri_bottas 0.00
nyck_de_vries 0.00
nico_hulkenberg 0.00
yuki_tsunoda 0.00
kevin_magnussen 0.00
zhou_guanyu 0.00


##### Qualifying Prediction

In [49]:
quali_data = full_data.iloc[:]
quali_data["in_top_5"] = quali_data['grid_position'].apply(lambda x: 1 if x<=5 else 0)
quali_data = quali_data.drop(["grid_position", "has_fastest_lap","race_laps_completed","points", "fastest_lap_position", "race_finishing_position"], axis = 1)

# 2021: 14744

X = quali_data.drop('in_top_5', axis=1)  
y = quali_data['in_top_5']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=25)
num_features = X_train.shape[1]
print("Training data shape:", X_train.shape, y_train.shape)
print("Testing data shape:", X_test.shape, y_test.shape)

Training data shape: (12643, 416) (12643,)
Testing data shape: (3161, 416) (3161,)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  quali_data["in_top_5"] = quali_data['grid_position'].apply(lambda x: 1 if x<=5 else 0)


In [50]:
quali_model = create_classification_model(num_features, (64, 32), 'relu', 0.1)
quali_model.fit(X_train, y_train, epochs=100, batch_size=32)

accuracy = quali_model.evaluate(X_test, y_test)
predictions = quali_model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)
accuracy = accuracy_score(y_test, predicted_labels)
print('Accuracy:', accuracy)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [51]:
results_quali = get_race_results_with_fp(driver_team_mapping, 2023, 10, 'Great Britain', 'cloudy', X_test, quali_model, fps)
arr_quali = []

for key, val in results_quali.items():
    arr_quali.append((key,"{:.2f}".format(float(val))))


for item in arr_quali:
    print(item[0], item[1])

max_verstappen 0.80
alexander_albon 0.73
sergio_perez 0.64
carlos_sainz 0.64
lewis_hamilton 0.58
charles_leclerc 0.54
fernando_alonso 0.49
logan_sargeant 0.47
pierre_gasly 0.43
george_russell 0.27
lando_norris 0.16
lance_stroll 0.07
oscar_piastri 0.03
nico_hulkenberg 0.01
esteban_ocon 0.01
valtteri_bottas 0.01
kevin_magnussen 0.01
nyck_de_vries 0.00
yuki_tsunoda 0.00
zhou_guanyu 0.00


##### Fastest Lap Prediction

In [52]:
fl_data = full_data.iloc[:]
fl_data["in_top_5"] = fl_data['fastest_lap_position'].apply(lambda x: 1 if x<=5 else 0)
fl_data = fl_data.drop(["grid_position", "has_fastest_lap","race_laps_completed","points", "fastest_lap_position", "race_finishing_position"], axis = 1)

# 2021: 14744

X = fl_data.drop('in_top_5', axis=1)  
y = fl_data['in_top_5']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=25)
num_features = X_train.shape[1]
print("Training data shape:", X_train.shape, y_train.shape)
print("Testing data shape:", X_test.shape, y_test.shape)

Training data shape: (12643, 416) (12643,)
Testing data shape: (3161, 416) (3161,)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fl_data["in_top_5"] = fl_data['fastest_lap_position'].apply(lambda x: 1 if x<=5 else 0)


In [53]:
fl_model = create_classification_model(num_features, (64, 32), 'relu', 0.1)
fl_model.fit(X_train, y_train, epochs=100, batch_size=32)

accuracy = fl_model.evaluate(X_test, y_test)
predictions = fl_model.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)
accuracy = accuracy_score(y_test, predicted_labels)
print('Accuracy:', accuracy)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [54]:
results_fl = get_race_results_with_fp(driver_team_mapping, 2023, 10, 'Great Britain', 'cloudy', X_test, fl_model, fps)
arr_fl = []

for key, val in results_fl.items():
    arr_fl.append((key,"{:.2f}".format(float(val))))


for item in arr_fl:
    print(item[0], item[1])

max_verstappen 0.70
alexander_albon 0.65
sergio_perez 0.58
carlos_sainz 0.29
logan_sargeant 0.25
lewis_hamilton 0.23
fernando_alonso 0.14
george_russell 0.13
charles_leclerc 0.11
pierre_gasly 0.08
lando_norris 0.06
lance_stroll 0.03
oscar_piastri 0.02
yuki_tsunoda 0.01
zhou_guanyu 0.01
valtteri_bottas 0.01
nico_hulkenberg 0.01
nyck_de_vries 0.01
kevin_magnussen 0.00
esteban_ocon 0.00


In [58]:
print("RACE ", arr_race)
print("........................")
print("QUALI", arr_quali)
print("........................")
print("F_LAP", arr_fl)

RACE  [('max_verstappen', '0.66'), ('alexander_albon', '0.58'), ('carlos_sainz', '0.48'), ('sergio_perez', '0.46'), ('lewis_hamilton', '0.43'), ('fernando_alonso', '0.35'), ('charles_leclerc', '0.31'), ('george_russell', '0.24'), ('pierre_gasly', '0.15'), ('logan_sargeant', '0.15'), ('lando_norris', '0.09'), ('lance_stroll', '0.02'), ('oscar_piastri', '0.02'), ('esteban_ocon', '0.01'), ('valtteri_bottas', '0.00'), ('nyck_de_vries', '0.00'), ('nico_hulkenberg', '0.00'), ('yuki_tsunoda', '0.00'), ('kevin_magnussen', '0.00'), ('zhou_guanyu', '0.00')]
........................
QUALI [('max_verstappen', '0.80'), ('alexander_albon', '0.73'), ('sergio_perez', '0.64'), ('carlos_sainz', '0.64'), ('lewis_hamilton', '0.58'), ('charles_leclerc', '0.54'), ('fernando_alonso', '0.49'), ('logan_sargeant', '0.47'), ('pierre_gasly', '0.43'), ('george_russell', '0.27'), ('lando_norris', '0.16'), ('lance_stroll', '0.07'), ('oscar_piastri', '0.03'), ('nico_hulkenberg', '0.01'), ('esteban_ocon', '0.01'), ('v