### Fully Connected Neural Network - Multivariate Regression

In [289]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
import pandas as pd
import requests
import collections

full_data = pd.read_csv('f1_data.csv')

##### Common Helper Functions

In [290]:
# Using formuladata api

def get_location_details(location):
    url = 'https://formuladataapi.pythonanywhere.com/api/f1/circuit_data'
    filters = {}
    filters['location'] = location
    response = requests.get(url, params=filters)
    data = response.json()
    try:
      latitude = data[0]['latitude']
      longitude = data[0]['longitude']
      circuit_length = float(data[0]['circuit_length'][0:3])
    except:
      return None
    return [latitude, longitude, circuit_length]


def get_fp_details(driver, season, round):
    url = 'https://formuladataapi.pythonanywhere.com/api/f1'
    filters = {}
    filters['driver_name'] = driver
    filters['round'] = round
    filters['season'] = season
    response = requests.get(url, params=filters)
    data = response.json()
    try:
      fp1 = int(data[0]['fp1_position'])
    except:
      fp1 = None
    try:
      fp2 = int(data[0]['fp2_position'])
    except:
      fp2 = None
    try:
      fp3 = int(data[0]['fp3_position'])
    except:
      fp3 = None
    return [fp1, fp2, fp3]
       

In [291]:
def get_race_results_with_fp(map, season, round, location, weather, XX, model, fps):
    race_results = {}
    weather_dict = {'dry':0, "cloudy":1, "wet":2}
    latitude = get_location_details(location)[0]
    longitude = get_location_details(location)[1]
    circuit_length = get_location_details(location)[2]

    for driver, team in map.items():
        datapoint = [0]*XX.shape[1]
        datapoint[0] = season
        datapoint[1] = round
        datapoint[2] = weather_dict[weather]
        try:
            datapoint[3] = int(fps[driver][0])
        except:
            datapoint[3] = 20
        try:
            datapoint[4] = int(fps[driver][1])
        except:
            datapoint[4] = 20
        try:
            datapoint[5] = int(fps[driver][2])
        except:
            datapoint[5] = 20
        datapoint[6] = circuit_length
        datapoint[7] = latitude
        datapoint[8] = longitude

        loc = location.lower().replace(' ', '_')
        location_index = XX.columns.get_loc(f'location_{loc}')
        datapoint[location_index] = 1
    
        driver_index = XX.columns.get_loc(f'driver_name_{driver}')
        team_index = XX.columns.get_loc(f'constructor_name_{team}')
        datapoint[driver_index] = 1
        datapoint[team_index] = 1

        df = pd.DataFrame([datapoint], columns=XX.columns)
        test_prediction = model.predict(df ,verbose=0)
        race_results[driver] = test_prediction
    sorted_results = sorted(race_results.items(), key=lambda x: x[1])
    sorted_results = collections.OrderedDict(sorted_results)
    return sorted_results

In [292]:
def get_race_results(map, season, round, location, weather, XX, model):
    race_results = {}
    weather_dict = {'dry':0, "cloudy":1, "wet":2}
    latitude = get_location_details(location)[0]
    longitude = get_location_details(location)[1]
    circuit_length = get_location_details(location)[2]

    for driver, team in map.items():
        datapoint = [0]*X_test.shape[1]
        datapoint[0] = season
        datapoint[1] = round
        datapoint[2] = weather_dict[weather]

        datapoint[3] = circuit_length
        datapoint[4] = latitude
        datapoint[5] = longitude

        loc = location.lower().replace(' ', '_')
        location_index = X.columns.get_loc(f'location_{loc}')
        datapoint[location_index] = 1
    
        driver_index = X.columns.get_loc(f'driver_name_{driver}')
        team_index = X.columns.get_loc(f'constructor_name_{team}')
        datapoint[driver_index] = 1
        datapoint[team_index] = 1

        df = pd.DataFrame([datapoint], columns=XX.columns)
        test_prediction = model.predict(df ,verbose=0)
        race_results[driver] = test_prediction
    sorted_results = sorted(race_results.items(), key=lambda x: x[1])
    sorted_results = collections.OrderedDict(sorted_results)
    return sorted_results

In [293]:
driver_team_mapping = {
    'max_verstappen': 'red_bull_racing',
    'fernando_alonso': 'aston_martin',
    'lewis_hamilton': 'mercedes',
    'charles_leclerc': 'ferrari',
    'carlos_sainz': 'ferrari',
    'sergio_perez': 'red_bull_racing',
    'alexander_albon': 'williams',
    'esteban_ocon': 'aston_martin',
    'lance_stroll': 'aston_martin',
    'valtteri_bottas': 'alfa_romeo',
    'oscar_piastri': 'mclaren',
    'pierre_gasly': 'renault',
    'lando_norris': 'mclaren',
    'yuki_tsunoda': 'toro_rosso',
    'nico_hulkenberg': 'haas',
    'zhou_guanyu': 'alfa_romeo',
    'kevin_magnussen': 'haas',
    'nyck_de_vries': 'toro_rosso',
    'george_russell': 'mercedes',
    'logan_sargeant': 'williams'
}


In [294]:
def create_model(num_features, units=(128, 64), activation='relu', dropout_rate=0.0):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(units[0], activation=activation, input_shape=(num_features,)),
        tf.keras.layers.Dense(units[1], activation=activation),
        tf.keras.layers.Dropout(dropout_rate),
        tf.keras.layers.Dense(1)
    ])
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model


##### Predicting Race Positions

In [295]:
race_data = full_data.drop(["has_fastest_lap","race_laps_completed","points","grid_position", "fastest_lap_position"], axis = 1)
race_data = race_data.iloc[:-20]
# 2021: 14744

X = race_data.drop('race_finishing_position', axis=1)  
y = race_data['race_finishing_position']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=25)
num_features = X_train.shape[1]
print("Training data shape:", X_train.shape, y_train.shape)
print("Testing data shape:", X_test.shape, y_test.shape)

Training data shape: (12643, 416) (12643,)
Testing data shape: (3161, 416) (3161,)


In [296]:
race_model = create_model(num_features,(64, 32), 'relu', 0.1)

In [297]:
# param_grid = {
#     'units': [(128, 64, 32, 16, 8), (128, 64, 32, 16, 8)],
#     'activation': ['relu', 'sigmoid'],
#     'dropout_rate': [0.0, 0.2, 0.4]
# }

# grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)
# grid_search.fit(X_train, y_train)

# print("Best Hyperparameters:", grid_search.best_params_)

# best_model = grid_search.best_estimator_
# best_model.fit(X_train, y_train)
# predictions = model.predict(X_test)

# mse = best_model.score(X_test, y_test)
# print("Mean Squared Error (MSE):", mse)

In [298]:
race_model.fit(X_train, y_train, epochs=100, batch_size=32)
loss = race_model.evaluate(X_test, y_test)
predictions = race_model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print('Mean Squared Error:', mse)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

In [299]:
print(predictions[0:10])
print(y_test[0:10])

[[10.515816 ]
 [ 7.9304175]
 [ 7.013461 ]
 [11.664524 ]
 [13.518106 ]
 [14.0820465]
 [17.639269 ]
 [12.090427 ]
 [17.275293 ]
 [ 6.4439726]]
7841      2
13845     2
12462     1
15157    14
13993    10
11498    20
4363     20
718       6
11441    13
12971     6
Name: race_finishing_position, dtype: int64


In [300]:
fps = {}
for driver, _ in driver_team_mapping.items():
    drivers = driver.split('_')
    for d in range(len(drivers)):
        drivers[d] = drivers[d][0].upper() + drivers[d][1:]
    driver_parsed = ' '.join(drivers)
    fps[driver] = get_fp_details(driver_parsed, 2023, 10)

print(fps)

{'max_verstappen': [1, 1, 8], 'fernando_alonso': [4, 10, 3], 'lewis_hamilton': [12, 15, 5], 'charles_leclerc': [5, None, 1], 'carlos_sainz': [7, 2, 6], 'sergio_perez': [2, 4, 14], 'alexander_albon': [3, 3, 2], 'esteban_ocon': [6, 13, 16], 'lance_stroll': [9, 6, 13], 'valtteri_bottas': [15, 16, 18], 'oscar_piastri': [10, 9, 17], 'pierre_gasly': [13, 8, 4], 'lando_norris': [8, 14, 12], 'yuki_tsunoda': [16, 18, 10], 'nico_hulkenberg': [20, 7, 19], 'zhou_guanyu': [18, 11, 20], 'kevin_magnussen': [19, 17, 15], 'nyck_de_vries': [11, 19, 11], 'george_russell': [14, 12, 9], 'logan_sargeant': [17, 5, 7]}


In [301]:
results_race = get_race_results_with_fp(driver_team_mapping, 2023, 10, 'Great Britain', 'cloudy', X_test, race_model, fps)

In [302]:
arr_race = []

for key, val in results_race.items():
    arr_race.append((key,"{:.2f}".format(float(val))))


for item in arr_race:
    print(item[0], item[1])


max_verstappen 7.08
alexander_albon 7.71
sergio_perez 8.11
lewis_hamilton 8.35
carlos_sainz 8.75
fernando_alonso 8.98
charles_leclerc 10.91
pierre_gasly 10.97
george_russell 11.33
lando_norris 12.06
lance_stroll 12.43
logan_sargeant 12.90
esteban_ocon 13.20
oscar_piastri 14.61
nyck_de_vries 15.12
valtteri_bottas 15.14
yuki_tsunoda 15.26
nico_hulkenberg 15.79
kevin_magnussen 16.11
zhou_guanyu 16.31


##### Predicting Qualifying Positions


In [303]:

quali_data = full_data.drop(["race_finishing_position", "has_fastest_lap","race_laps_completed","points", "fastest_lap_position"], axis = 1)
quali_data = quali_data.iloc[:-20]
# 2021: 14744

X = quali_data.drop('grid_position', axis=1)  
y = quali_data['grid_position']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=25)
num_features = X_train.shape[1]
print("Training data shape:", X_train.shape, y_train.shape)
print("Testing data shape:", X_test.shape, y_test.shape)

Training data shape: (12643, 416) (12643,)
Testing data shape: (3161, 416) (3161,)


In [304]:
quali_model = create_model(num_features,(64, 32), 'relu', 0.1)

In [305]:
quali_model.fit(X_train, y_train, epochs=100, batch_size=32)
loss = quali_model.evaluate(X_test, y_test)
predictions = quali_model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print('Mean Squared Error:', mse)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [306]:
print(predictions[0:10])
print(y_test[0:10])

[[ 7.550992 ]
 [ 4.6876473]
 [ 4.6562276]
 [ 8.630849 ]
 [ 9.398516 ]
 [12.159292 ]
 [15.974306 ]
 [ 7.3542695]
 [18.034359 ]
 [ 4.565941 ]]
7841      7
13845     2
12462     2
15157     8
13993    15
11498    16
4363     14
718       1
11441    18
12971     4
Name: grid_position, dtype: int64


In [307]:
results_quali = get_race_results_with_fp(driver_team_mapping, 2023, 10, 'Great Britain', 'cloudy', X_test, quali_model, fps)

In [308]:
arr_quali = []
for key, val in results_quali.items():
    arr_quali.append((key,"{:.2f}".format(float(val))))
for item in arr_quali:
    print(item[0], item[1])


alexander_albon 4.47
max_verstappen 4.53
carlos_sainz 5.70
sergio_perez 6.60
fernando_alonso 7.44
lewis_hamilton 7.52
charles_leclerc 7.62
pierre_gasly 7.97
logan_sargeant 8.67
george_russell 9.19
lando_norris 9.40
lance_stroll 10.82
oscar_piastri 11.92
esteban_ocon 12.62
nico_hulkenberg 13.18
nyck_de_vries 13.53
yuki_tsunoda 14.20
valtteri_bottas 14.67
zhou_guanyu 15.19
kevin_magnussen 15.28


##### Predict Fastest Lap

In [309]:
fl_data = full_data.drop(["race_finishing_position", "has_fastest_lap","race_laps_completed","points", "grid_position"], axis = 1)
fl_data = fl_data.iloc[:-20]
# 2021: 14744

X = fl_data.drop('fastest_lap_position', axis=1)  
y = fl_data['fastest_lap_position']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=25)
num_features = X_train.shape[1]
print("Training data shape:", X_train.shape, y_train.shape)
print("Testing data shape:", X_test.shape, y_test.shape)

Training data shape: (12643, 416) (12643,)
Testing data shape: (3161, 416) (3161,)


In [310]:
fl_model = create_model(num_features,(64, 32), 'relu', 0.1)
fl_model.fit(X_train, y_train, epochs=100, batch_size=32)
loss = fl_model.evaluate(X_test, y_test)
predictions = fl_model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print('Mean Squared Error:', mse)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [315]:
print(predictions[0:10])
print(y_test[0:10])

[[ 6.9322605]
 [ 4.2308826]
 [ 4.41772  ]
 [ 8.716682 ]
 [ 9.081712 ]
 [10.661795 ]
 [13.729399 ]
 [ 6.6319985]
 [16.657661 ]
 [ 4.271223 ]]
7841      3
13845     5
12462     1
15157     7
13993     6
11498    11
4363     12
718       6
11441    15
12971     5
Name: fastest_lap_position, dtype: int64


In [316]:
results_fl = get_race_results_with_fp(driver_team_mapping, 2023, 10, 'Great Britain', 'cloudy', X_test, fl_model, fps)

In [317]:
arr_fl = []
for key, val in results_fl.items():
    arr_fl.append((key,"{:.2f}".format(float(val))))
for item in arr_fl:
    print(item[0], item[1])

max_verstappen 3.91
alexander_albon 4.50
sergio_perez 4.68
carlos_sainz 5.20
fernando_alonso 6.65
lewis_hamilton 7.20
charles_leclerc 8.21
logan_sargeant 8.22
pierre_gasly 8.46
lance_stroll 8.69
lando_norris 8.92
george_russell 8.93
oscar_piastri 12.09
esteban_ocon 12.15
yuki_tsunoda 12.28
nico_hulkenberg 12.37
zhou_guanyu 12.85
nyck_de_vries 13.16
valtteri_bottas 13.40
kevin_magnussen 13.88


In [318]:
print(arr_race)
print("........................")
print(arr_quali)
print("........................")
print(arr_fl)

[('max_verstappen', '7.08'), ('alexander_albon', '7.71'), ('sergio_perez', '8.11'), ('lewis_hamilton', '8.35'), ('carlos_sainz', '8.75'), ('fernando_alonso', '8.98'), ('charles_leclerc', '10.91'), ('pierre_gasly', '10.97'), ('george_russell', '11.33'), ('lando_norris', '12.06'), ('lance_stroll', '12.43'), ('logan_sargeant', '12.90'), ('esteban_ocon', '13.20'), ('oscar_piastri', '14.61'), ('nyck_de_vries', '15.12'), ('valtteri_bottas', '15.14'), ('yuki_tsunoda', '15.26'), ('nico_hulkenberg', '15.79'), ('kevin_magnussen', '16.11'), ('zhou_guanyu', '16.31')]
........................
[('alexander_albon', '4.47'), ('max_verstappen', '4.53'), ('carlos_sainz', '5.70'), ('sergio_perez', '6.60'), ('fernando_alonso', '7.44'), ('lewis_hamilton', '7.52'), ('charles_leclerc', '7.62'), ('pierre_gasly', '7.97'), ('logan_sargeant', '8.67'), ('george_russell', '9.19'), ('lando_norris', '9.40'), ('lance_stroll', '10.82'), ('oscar_piastri', '11.92'), ('esteban_ocon', '12.62'), ('nico_hulkenberg', '13.18'