# TFG - Fórmula 1 - Test ranking NN

Autor: Manuel Ventura

Test de la red neuronal obtenida para predecir rankings, utilizando la métrica *Kendall's Tau*.

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
import joblib
from sklearn.preprocessing import MinMaxScaler
import scipy

## Test model over full dataset (regression model)

In [None]:
df_old = pd.read_csv('data_ready/LEARNING_DF_NORMAL.csv')
grid = pd.read_csv('data_ready/2023_grid_data.csv')
date_parse_list = ['date', 'fp1_date', 'fp2_date', 'fp3_date', 'quali_date', 'sprint_date']
races = pd.read_csv('f1db_csv/races.csv', na_values=["\\N"], parse_dates=date_parse_list)
drivers = pd.read_csv('f1db_csv/drivers.csv')
model = tf.keras.models.load_model('models/nn_model_final.h5')
scaler = joblib.load('models/minmaxscaler_nn_final.pkl')

In [145]:
races = races.loc[races['year'] >=2000].copy()
scores = []

for index, race in races.iterrows():
    if race.date>=pd.Timestamp.now():
        continue
    # Get race
    df = df_old.loc[(df_old['year']==race['year']) & (df_old['round']==race['round'])].copy()
    
    # Transform data
    df['grid'] = df['grid'].clip(upper=20)
    df['position'] = df['position'].clip(upper=20)
    columns_to_scale = ['year', 'age', 'experience', 'driversPointsBeforeRace', 'constPointsBeforeRace']
    df[columns_to_scale] = scaler.transform(df[columns_to_scale])
    columns_to_replace = ['weather_warm', 'weather_cold', 'weather_dry', 'weather_wet', 'weather_cloudy']
    df[columns_to_replace] = df[columns_to_replace].replace(1,20)
    y = df['position']
    drivers = df['driverId']
    constructors = df['constructorId']
    circuits = df['circuitId']
    X = df.drop(['driverId', 'constructorId', 'circuitId', 'position'], axis=1)
    
    # Predict and evaluate
    preds = model.predict([drivers, constructors, circuits, X], verbose = 0)
    preds_df = pd.DataFrame()
    preds_df['driverId'] = df['driverId']
    preds_df['predictions'] = preds
    preds_df = preds_df.merge(drivers, how='inner', on='driverId')
    preds_df = preds_df.sort_values(by=['predictions'])
    df = df.sort_values(by=['position'])
    scores.append(scipy.stats.kendalltau(preds_df['driverId'], df['driverId']).correlation)
    
avg_kendall = sum(scores)/len(scores)

In [146]:
avg_kendall

0.7825871262551288

## Test model over full dataset (categorical model)

In [None]:
df_old = pd.read_csv('data_ready/LEARNING_DF_NORMAL.csv')
grid = pd.read_csv('data_ready/2023_grid_data.csv')
date_parse_list = ['date', 'fp1_date', 'fp2_date', 'fp3_date', 'quali_date', 'sprint_date']
races = pd.read_csv('f1db_csv/races.csv', na_values=["\\N"], parse_dates=date_parse_list)
drivers = pd.read_csv('f1db_csv/drivers.csv')
model = tf.keras.models.load_model('models/nn_model_categorical_nolookup.h5')
scaler = joblib.load('models/minmaxscaler_categorical_nolookup.pkl')

In [151]:
races = races.loc[races['year'] >=2000].copy()
scores = []

for index, race in races.iterrows():
    if race.date>=pd.Timestamp.now():
        continue
    # Get race
    df = df_old.loc[(df_old['year']==race['year']) & (df_old['round']==race['round'])].copy()
    
    # Transform data
    df['grid'] = df['grid'].clip(upper=20)
    df['position'] = df['position'].clip(upper=20)
    columns_to_scale = ['grid', 'year', 'round', 'age', 'experience', 'driversPointsBeforeRace', 'constPointsBeforeRace']
    df[columns_to_scale] = scaler.transform(df[columns_to_scale])
    y = df['position']
    drivers = df['driverId']
    constructors = df['constructorId']
    circuits = df['circuitId']
    X = df.drop(['driverId', 'constructorId', 'circuitId', 'position'], axis=1)
    
    # Predict and evaluate
    preds = model.predict([drivers, constructors, circuits, X], verbose = 0)
    df_preds = pd.DataFrame()
    df_preds['driverId'] = df['driverId']
    df_preds['prediction'] = np.argmax(preds, axis=1)+1
    df_preds = df_preds.merge(drivers, how='inner', on='driverId')
    df_preds = df_preds.sort_values(by=['prediction'])
    df = df.sort_values(by=['position'])
    scores.append(scipy.stats.kendalltau(df_preds['driverId'], df['driverId']).correlation)
    
avg_kendall = sum(scores)/len(scores)

In [152]:
avg_kendall

0.9825513935541836

## Test model over full dataset (categorical model with lookup)

In [None]:
df_old = pd.read_csv('data_ready/LEARNING_DF_NORMAL.csv')
grid = pd.read_csv('data_ready/2023_grid_data.csv')
date_parse_list = ['date', 'fp1_date', 'fp2_date', 'fp3_date', 'quali_date', 'sprint_date']
races = pd.read_csv('f1db_csv/races.csv', na_values=["\\N"], parse_dates=date_parse_list)
drivers = pd.read_csv('f1db_csv/drivers.csv')
constructors = pd.read_csv('f1db_csv/constructors.csv')
circuits = pd.read_csv('f1db_csv/circuits.csv')
model = tf.keras.models.load_model('models/nn_model_categorical')
scaler = joblib.load('models/minmaxscaler_categorical.pkl')


df_old = df_old.merge(drivers[['driverId','driverRef']], how='inner', on='driverId')
df_old = df_old.merge(constructors[['constructorId','constructorRef']], how='inner', on='constructorId')
df_old = df_old.merge(circuits[['circuitId','circuitRef']], how='inner', on='circuitId')

In [None]:
races = races.loc[races['year'] >=2000].copy()
scores = []

for index, race in races.iterrows():
    if race.date>=pd.Timestamp.now():
        continue
    # Get race
    df = df_old.loc[(df_old['year']==race['year']) & (df_old['round']==race['round'])].copy()
    
    # Transform data
    df['grid'] = df['grid'].clip(upper=20)
    df['position'] = df['position'].clip(upper=20)
    columns_to_scale = ['grid', 'year', 'round', 'age', 'experience', 'driversPointsBeforeRace', 'constPointsBeforeRace']
    df[columns_to_scale] = scaler.transform(df[columns_to_scale])
    df = df.drop(['driverId', 'constructorId', 'circuitId'], axis=1)
    drivers = df['driverRef']
    constructors = df['constructorRef']
    circuits = df['circuitRef']
    X = df.drop(['driverRef', 'constructorRef', 'circuitRef', 'position'], axis=1)
    
    # Predict and evaluate
    preds = model.predict([drivers, constructors, circuits, X], verbose=0)
    df_preds = pd.DataFrame()
    df_preds['driverRef'] = df['driverRef']
    df_preds['prediction'] = np.argmax(preds, axis=1)+1
    df_preds = df_preds.merge(drivers, how='inner', on='driverRef')
    df_preds = df_preds.sort_values(by=['prediction'])
    df = df.sort_values(by=['position'])
    scores.append(scipy.stats.kendalltau(df_preds['driverRef'], df['driverRef']).correlation)

avg_kendall = sum(scores)/len(scores)

In [161]:
avg_kendall

0.9796374382586873