# 1. Ideia inicial

Será feita um regressão de probabilidade de vitoria do piloto. Assim, vamos dar uma probabilidade de vitoria para os 10 primeiros colocados de 100% a 0%, para o primeiro ao ultimo colocado, respectivamente.
- Os dados serão agregados por driver_standings.
- Serão criados 3 modelos um para antes da corrida, outro apos as qualificações e outro durante a corrida.
    - Antes: só vai levar em considerações os dados da pista e do piloto.
    - Qualificação: vai levar em consideração os dados da pista, do piloto e da qualificação(tempos de volta).
    - Corrida: vai levar em consideração os dados da pista, do piloto, da qualificação e da corrida(melhor volta, voltas lideradas, pit stops, etc).

In [133]:
import pickle

import numpy as np
import pandas as pd
import numpy
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import cross_validate


class EvaluatedClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, classifier, cv=10, graphic=False, compact=False, is_regression=False):
        self.classifier = classifier
        self.cv = cv
        self.graphic = graphic
        self.compact = compact
        self.is_regression = is_regression

    def fit(self, X, y, *args, **kwargs):
        self.classifier.fit(X, y, *args, **kwargs)
        return self

    def predict(self, X):
        return self.classifier.predict(X)

    def save(self, path):
        pickle.dump(self, open(path, 'wb'))

    def fit_predict_cv(self, X, y, show_result=True):
        res = self._cross_validate(X, y, show_result=show_result)
        self.fit(X, y)
        return res

    def _cross_validate(self, X, y, show_result=True):
        classification = ['accuracy', 'precision', 'recall', 'f1', 'roc_auc']
        regression = ['r2', 'neg_mean_absolute_error', 'neg_mean_squared_error', 'explained_variance',
                      'neg_median_absolute_error']
        scoring = classification if not self.is_regression else regression

        result = cross_validate(self.classifier, X, y,
                                scoring=scoring, n_jobs=-1)
        if show_result:
            print(f"> Validação Cruzada (cv={self.cv}):")
            if self.is_regression:
                print(
                    f"R2: {result['test_r2'].mean():.3f} (+/- {result['test_r2'].std() * 2:.3f})\n" + \
                    f"MAE: {-result['test_neg_mean_absolute_error'].mean():.3f} (+/- {result['test_neg_mean_absolute_error'].std() * 2:.3f})\n" + \
                    f"MSE: {-result['test_neg_mean_squared_error'].mean():.3f} (+/- {result['test_neg_mean_squared_error'].std() * 2:.3f})\n" + \
                    f"Explained Variance: {result['test_explained_variance'].mean():.3f} (+/- {result['test_explained_variance'].std() * 2:.3f})\n" + \
                    f"Median Absolute Error: {-result['test_neg_median_absolute_error'].mean():.3f} (+/- {result['test_neg_median_absolute_error'].std() * 2:.3f})"
                )
            else:
                print(
                    f"Accuracy: {result['test_accuracy'].mean():.3f} (+/- {result['test_accuracy'].std() * 2:.3f})\n" + \
                    f"Precision: {result['test_precision'].mean():.3f} (+/- {result['test_precision'].std() * 2:.3f})\n" + \
                    f"Recall: {result['test_recall'].mean():.3f} (+/- {result['test_recall'].std() * 2:.3f})\n" + \
                    f"F1: {result['test_f1'].mean():.3f} (+/- {result['test_f1'].std() * 2:.3f})\n" + \
                    f"ROC AUC: {result['test_roc_auc'].mean():.3f} (+/- {result['test_roc_auc'].std() * 2:.3f})"
                )
        return result


In [134]:
pd.set_option('display.max_columns', None)

path = '../data/ergast/'
circuits = pd.read_csv(path + 'circuits.csv')
constructor_results = pd.read_csv(path + 'constructor_results.csv')
constructor_standings = pd.read_csv(path + 'constructor_standings.csv')
constructors = pd.read_csv(path + 'constructors.csv')
driver_standings = pd.read_csv(path + 'driver_standings.csv')
drivers = pd.read_csv(path + 'drivers.csv')
lap_times = pd.read_csv(path + 'lap_times.csv')
pit_stops = pd.read_csv(path + 'pit_stops.csv')
qualifying = pd.read_csv(path + 'qualifying.csv')
races = pd.read_csv(path + 'races.csv')
results = pd.read_csv(path + 'results.csv')
seasons = pd.read_csv(path + 'seasons.csv')
sprint_results = pd.read_csv(path + 'sprint_results.csv')
status = pd.read_csv(path + 'status.csv')

In [135]:
df = pd.merge(results, drivers, on='driverId', suffixes=("", "_drivers"))
df = pd.merge(df, races, on='raceId', suffixes=("", "_races"))
df = pd.merge(df, circuits, on='circuitId', suffixes=("", "_circuits"))
df = pd.merge(df, constructors, on='constructorId', suffixes=("", "_constructors"))
df = pd.merge(df, status, on='statusId')
df = pd.merge(df, driver_standings, on=['raceId', 'driverId'], suffixes=("", "_acc"))

In [136]:
df.to_csv('../data/partial/all.csv', index=False)

In [137]:
df_laps = pd.merge(lap_times, races, on="raceId", suffixes=("", "_race"))
df_laps = pd.merge(df_laps, circuits, on="circuitId", suffixes=("", "_circuits"))
df_laps['date'] = pd.to_datetime(df_laps['date'])

In [138]:
from concurrent.futures import ThreadPoolExecutor

df['dob'] = pd.to_datetime(df['dob'])
df['date'] = pd.to_datetime(df['date'])
# Calculate age compare to date
df['age'] = (df['date'] - df['dob']).dt.days / 365
df['age'] = df['age'].astype(int)

# set nulll where \N
df = df.replace('\\N', numpy.nan)

weather = pd.read_csv('../data/weather.csv')
df = df.merge(weather, on=['raceId'])

# round to 2 decimal humidity and temperature
df['humidity'] = df['humidity'].round(2)
df['temperature'] = df['temperature'].round(2)


# based on table lap_times find the best milliseconds, that a reace made on a circuit, before a specificy race
def best_lap_time(raceId, circuitId, race_date):
    return df_laps[
        (df_laps['raceId'] < raceId) & (df_laps['circuitId'] == circuitId) & (df_laps['date'] < race_date)][
        'milliseconds'].min()


def get_best_lap_time(row):
    return best_lap_time(row['raceId'], row['circuitId'], row['date'])


with ThreadPoolExecutor() as executor:
    df['faster_lap_circuit_ever'] = list(executor.map(get_best_lap_time, df.to_dict('records')))

# Convert 'grid' and 'position' column to numeric
df['grid'] = pd.to_numeric(df['grid'], errors='coerce')
df['position'] = pd.to_numeric(df['position'], errors='coerce')

# Ensure 'date' is in datetime format
df['date'] = pd.to_datetime(df['date'])

# Create a new dataframe with raceId, driverId, grid and position
df_grid_position = df[['raceId', 'driverId', 'grid', 'position', 'wins']].sort_values(['driverId', 'raceId'])

# Calculate the expanding mean of 'grid' and 'position' grouped by 'driverId'
df_grid_position['AvgGrid'] = df_grid_position.groupby('driverId')['grid'].expanding().mean().reset_index(level=0,
                                                                                                          drop=True)
df_grid_position['AvgFn'] = df_grid_position.groupby('driverId')['position'].expanding().mean().reset_index(level=0,
                                                                                                            drop=True)

df_grid_position['wins_cum'] = df_grid_position.groupby('driverId')['wins'].expanding().sum().reset_index(level=0,
                                                                                                          drop=True)

# Merge the df_grid_position dataframe back into the main dataframe (df)
df = pd.merge(df, df_grid_position[['raceId', 'driverId', 'AvgGrid', 'AvgFn', 'wins_cum']], on=['raceId', 'driverId'])

# Ensure that AvgGrid and AvgFn are rounded to two decimal places
df['AvgGrid'] = df['AvgGrid'].round(2)
df['AvgFn'] = df['AvgFn'].round(2)
df['wins_cum'] = df['wins_cum'].round(2)


In [197]:
clean = [
    'resultId',
    'position',
    'positionText',
    'points',
    'fastestLap',
    'time',  # time to finish the race
    'milliseconds',  # time in milisecond to finish the race
    'fastestLapSpeed',
    'fastestLapTime',
    'rank',  # rank of fast lap in a race
    'statusId',
    'status',
    'grid',

    'raceId',
    'year',
    # 'date',
    'time_races',
    'name',  #Nome do grand pix
    'url_races',

    # 'driverId',
    'driverRef',
    'number_drivers',
    'dob',
    'code',
    'url',
    'forename',
    'surname',

    'driverStandingsId',
    'number',  # car number
    'positionText_acc',  # position acumulated unit a race

    'fp1_time',
    'fp1_date',
    'fp2_time',
    'fp2_date',
    'fp3_time',
    'fp3_date',

    'quali_time',
    'quali_date',
    'sprint_date',
    'sprint_time',

    'constructorRef',
    'name_constructors',
    'url_constructors',

    'circuitRef',
    'name_circuits',
    'location',
    'url_circuits',

    'lat',
    'lng',

    'wmo_code'
]

for col in clean:
    if col not in df.columns:
        print(col)

df_clean = df.copy()

df_clean.drop(columns=clean, inplace=True)

df_clean = df_clean.rename(
    columns={'positionOrder': 'position', 'points_acc': 'points_season', 'position_acc': 'position_season',
             'wins': 'wins_season', 'alt': 'height', 'country': 'country_circuit'
             })

# df_clean drop null at position
df_clean = df_clean.dropna(subset=['position'])


In [198]:
# from ydata_profiling import ProfileReport
#
# profile = ProfileReport(df, title='Pandas Profiling Report', )
# profile.to_file("profile/final.html")

In [199]:
position_prob = [i for i in range(10, 110, 10)]
# inver position_prob
position_prob.reverse()

df_clean['position'] = df_clean.position.astype(int)
df_clean['position'] = df_clean['position'].apply(lambda x: position_prob[x - 1] if 1 <= x <= 10 else 0)

In [200]:
df_clean

Unnamed: 0,driverId,constructorId,position,laps,nationality,round,circuitId,date,country_circuit,height,nationality_constructors,points_season,position_season,wins_season,age,weather_condition,humidity,temperature,faster_lap_circuit_ever,AvgGrid,AvgFn,wins_cum
0,1,1,100,58,British,1,1,2008-03-16,Australia,10.0,British,10.0,1,1,23,dry,18.00,36.90,,8.18,7.20,13.0
1,5,1,60,58,Finnish,1,1,2008-03-16,Australia,10.0,British,4.0,5,0,26,dry,18.00,36.90,,10.47,7.92,0.0
2,2,2,90,58,German,1,1,2008-03-16,Australia,10.0,German,8.0,2,0,30,dry,18.00,36.90,,11.61,9.00,0.0
3,3,3,80,58,German,1,1,2008-03-16,Australia,10.0,British,6.0,3,0,22,dry,18.00,36.90,,8.00,7.47,0.0
4,4,4,70,58,Spanish,1,1,2008-03-16,Australia,10.0,French,5.0,4,0,26,dry,18.00,36.90,,9.00,7.87,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25560,520,157,0,128,American,3,19,1950-05-30,USA,223.0,American,0.0,34,0,24,dry,92.67,18.43,,17.11,13.43,0.0
25561,799,113,0,52,American,3,19,1950-05-30,USA,223.0,American,0.0,63,0,39,dry,92.67,18.43,,20.00,15.00,0.0
25562,731,105,0,112,American,3,19,1950-05-30,USA,223.0,Italian,0.0,67,0,36,dry,92.67,18.43,,16.67,12.50,0.0
25563,659,113,0,125,American,3,19,1950-05-30,USA,223.0,American,0.0,39,0,21,dry,92.67,18.43,,17.00,14.50,0.0


In [201]:
df_clean.to_csv('../data/partial/clean.csv', index=False)

In [202]:
nationality_to_country = {
    'British': 'UK',
    'Italian': 'Italy',
    'French': 'France',
    'German': 'Germany',
    'Brazilian': 'Brazil',
    'American': 'USA',
    'Finnish': 'Finland',  # Not in the country list, added manually
    'Spanish': 'Spain',
    'Australian': 'Australia',
    'Austrian': 'Austria',
    'Japanese': 'Japan',
    'Belgian': 'Belgium',
    'Swedish': 'Sweden',
    'Swiss': 'Switzerland',
    'Dutch': 'Netherlands',
    'Canadian': 'Canada',
    'Mexican': 'Mexico',
    'New Zealander': 'New Zealand',  # Not in the country list, added manually
    'Argentine': 'Argentina',
    'Russian': 'Russia',
    'South African': 'South Africa',
    'Danish': 'Denmark',  # Not in the country list, added manually
    'Monegasque': 'Monaco',
    'Colombian': 'Colombia',  # Not in the country list, added manually
    'Venezuelan': 'Venezuela',  # Not in the country list, added manually
    'Polish': 'Poland',  # Not in the country list, added manually
    'Irish': 'Ireland',  # Not in the country list, added manually
    'Portuguese': 'Portugal',
    'Thai': 'Thailand',  # Not in the country list, added manually
    'Indian': 'India',
    'Chilean': 'Chile',  # Not in the country list, added manually
    'Chinese': 'China',
    'Hungarian': 'Hungary',
    'Rhodesian': 'Zimbabwe',  # Rhodesia is the former name of Zimbabwe
    'Malaysian': 'Malaysia',
    'Liechtensteiner': 'Liechtenstein',  # Not in the country list, added manually
    'Indonesian': 'Indonesia',  # Not in the country list, added manually
    'Uruguayan': 'Uruguay',  # Not in the country list, added manually
    'East German': 'Germany',
    'Czech': 'Czech Republic',  # Not in the country list, added manually
    'American-Italian': 'USA',
    'Argentine-Italian': 'Argentina',
    'Hong Kong': 'Hong Kong',  # Not in the country list, added manually
    'Bahrain': 'Bahrain',
    'Turkey': 'Turkey',
    'Singapore': 'Singapore',
    'UAE': 'UAE',
    'Korea': 'Korea',
    'Azerbaijan': 'Azerbaijan',
    'Morocco': 'Morocco',
    'Qatar': 'Qatar',
    'Saudi Arabia': 'Saudi Arabia',
}

# based on the map above create a encoder for each country
country_encoder = [i for i in nationality_to_country.values()]

In [203]:
df_clean['nationality'] = df_clean['nationality'].apply(lambda x: nationality_to_country[x])
df_clean['nationality_constructors'] = df_clean['nationality_constructors'].apply(lambda x: nationality_to_country[x])

# parse nationality, nationality_constructors and country_circuit to category

df_clean['nationality'] = df_clean['nationality'].apply(lambda x: country_encoder.index(x))
df_clean['nationality_constructors'] = df_clean['nationality_constructors'].apply(lambda x: country_encoder.index(x))
df_clean['country_circuit'] = df_clean['country_circuit'].apply(lambda x: country_encoder.index(x))

In [204]:
from sklearn.preprocessing import LabelEncoder

# label encoder weather_conditions
le = LabelEncoder()

df_clean['weather_condition'] = le.fit_transform(df_clean['weather_condition'])
df_clean['weather_condition'] = df_clean['weather_condition'].astype('int')

In [205]:
df_clean['height'] = df_clean['height'].astype('float')
# columns types
df_clean.dtypes

driverId                             int64
constructorId                        int64
position                             int64
laps                                 int64
nationality                          int64
round                                int64
circuitId                            int64
date                        datetime64[ns]
country_circuit                      int64
height                             float64
nationality_constructors             int64
points_season                      float64
position_season                      int64
wins_season                          int64
age                                  int64
weather_condition                    int64
humidity                           float64
temperature                        float64
faster_lap_circuit_ever            float64
AvgGrid                            float64
AvgFn                              float64
wins_cum                           float64
dtype: object

In [206]:
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 25565 entries, 0 to 25564
Data columns (total 22 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   driverId                  25565 non-null  int64         
 1   constructorId             25565 non-null  int64         
 2   position                  25565 non-null  int64         
 3   laps                      25565 non-null  int64         
 4   nationality               25565 non-null  int64         
 5   round                     25565 non-null  int64         
 6   circuitId                 25565 non-null  int64         
 7   date                      25565 non-null  datetime64[ns]
 8   country_circuit           25565 non-null  int64         
 9   height                    25565 non-null  float64       
 10  nationality_constructors  25565 non-null  int64         
 11  points_season             25565 non-null  float64       
 12  position_season   

In [180]:
# Remove faster_lap_circuit_ever column
df_clean = df_clean.drop(columns=['faster_lap_circuit_ever'])

df_clean = df_clean.dropna(subset=['AvgFn'])

In [181]:
# detect null values
df_clean.isnull().sum()

constructorId               0
position                    0
laps                        0
nationality                 0
round                       0
circuitId                   0
country_circuit             0
height                      0
nationality_constructors    0
points_season               0
position_season             0
wins_season                 0
age                         0
weather_condition           0
humidity                    0
temperature                 0
AvgGrid                     0
AvgFn                       0
wins_cum                    0
dtype: int64

In [182]:
X, y = df_clean.drop(columns=['position']), df_clean['position']

In [183]:
# Randon Forest
from sklearn.ensemble import RandomForestRegressor

_ = EvaluatedClassifier(RandomForestRegressor(), is_regression=True).fit_predict_cv(X, y)

> Validação Cruzada (cv=10):
R2: 0.652 (+/- 0.060)
MAE: 12.769 (+/- 1.908)
MSE: 388.699 (+/- 77.734)
Explained Variance: 0.654 (+/- 0.059)
Median Absolute Error: 7.320 (+/- 2.002)


In [184]:
_ = EvaluatedClassifier(RandomForestRegressor(n_jobs=-1), is_regression=True).fit_predict_cv(X, y)

> Validação Cruzada (cv=10):
R2: 0.653 (+/- 0.063)
MAE: 12.782 (+/- 1.978)
MSE: 387.964 (+/- 81.558)
Explained Variance: 0.654 (+/- 0.062)
Median Absolute Error: 7.370 (+/- 2.148)


In [185]:
# XGBoost
from xgboost import XGBRegressor

model_eval = EvaluatedClassifier(XGBRegressor(
    # n_estimators=1000,
    n_jobs=-1,
    learning_rate=0.1,
), is_regression=True)

_ = model_eval.fit_predict_cv(X, y)

> Validação Cruzada (cv=10):
R2: 0.667 (+/- 0.086)
MAE: 12.703 (+/- 2.236)
MSE: 371.811 (+/- 109.670)
Explained Variance: 0.668 (+/- 0.086)
Median Absolute Error: 7.200 (+/- 2.295)


In [169]:
# MLP
from sklearn.neural_network import MLPRegressor

_ = EvaluatedClassifier(MLPRegressor(
    # hidden_layer_sizes=(100, 100, 100, 100, 100, 100, 100, 100, 100, 100),
    max_iter=1000,
    # activation='relu',
    # solver='adam',
    # learning_rate='adaptive',
    learning_rate_init=0.1,
    # alpha=0.01,
    # batch_size=100,
    # verbose=True,
    # random_state=42,
    # tol=0.0001,
    # early_stopping=True,
    # validation_fraction=0.1,
    # n_iter_no_change=10,
    # shuffle=True,
    # warm_start=False,
    # momentum=0.9,
    # nesterovs_momentum=True,
    # power_t=0.5,
    # beta_1=0.9,
    # beta_2=0.999,
    # epsilon=1e-08,
    # max_fun=15000
), is_regression=True).fit_predict_cv(X, y)

> Validação Cruzada (cv=10):
R2: 0.424 (+/- 0.090)
MAE: 19.271 (+/- 1.750)
MSE: 642.159 (+/- 106.867)
Explained Variance: 0.431 (+/- 0.088)
Median Absolute Error: 13.574 (+/- 3.842)


In [None]:
# Export the model
model_eval.save('model/model.pkl')

In [227]:
from datetime import datetime

races['date'] = pd.to_datetime(races.date)
race_next = races.loc[races.date > datetime.now()].sort_values(by='date')
race_next = pd.merge(race_next, circuits, on='circuitId')
# get last row
race_next = race_next.iloc[0]
drivers_next = [842, 815, 4, 844, 840, 858, 825, 852, 848, 855, 807, 839, 830, 846, 1, 856, 832, 847, 822, 857]

# get the drivers
info_new = df_clean.loc[df_clean['driverId'].isin(drivers_next)].sort_values(by='date')

info_new

Unnamed: 0,driverId,constructorId,position,laps,nationality,round,circuitId,date,country_circuit,height,nationality_constructors,points_season,position_season,wins_season,age,weather_condition,humidity,temperature,faster_lap_circuit_ever,AvgGrid,AvgFn,wins_cum
245,4,18,0,56,7,1,1,2001-03-04,8,10.0,1,0.0,12,0,19,0,87.33,18.37,,6.15,4.22,218.0
564,4,18,0,52,7,2,2,2001-03-18,34,18.0,1,0.0,16,0,19,0,83.67,26.23,,6.26,4.31,218.0
7999,4,18,0,25,7,3,18,2001-04-01,4,785.0,1,0.0,19,0,19,0,74.00,24.27,,6.37,4.31,218.0
11389,4,18,0,5,7,4,21,2001-04-15,1,37.0,1,0.0,20,0,19,0,43.00,13.53,,6.46,4.31,218.0
1148,4,18,0,63,7,5,4,2001-04-29,7,109.0,1,0.0,21,0,19,0,64.67,17.47,,6.55,4.39,218.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9619,822,51,0,57,6,22,24,2022-11-20,46,3.0,13,49.0,10,0,33,0,59.33,28.00,86103.0,6.54,6.30,109.0
9620,855,51,0,57,31,22,24,2022-11-20,46,3.0,13,6.0,18,0,23,0,59.33,28.00,86103.0,13.36,12.88,0.0
9621,842,213,0,57,2,22,24,2022-11-20,46,3.0,1,23.0,14,0,26,0,59.33,28.00,86103.0,10.26,9.76,10.0
9606,846,1,50,58,0,22,24,2022-11-20,46,3.0,0,122.0,7,0,23,0,59.33,28.00,86103.0,8.21,7.84,0.0


In [277]:
import numpy as np

rank = {}
for i in drivers_next:
    try:
        temp = info_new.loc[info_new['driverId'] == i].iloc[0]
        temp.circuitId = race_next.circuitId
        temp.country_circuit = country_encoder.index(race_next.country)
        temp.height = race_next.alt
        # rain 67.66666666666667 23.066666666666666
        temp.weather_condition = le.transform(['rain'])[0]
        temp.humidity = 67.67
        temp.temperature = 23.07
        stay = set(temp.index) - {'driverId', 'faster_lap_circuit_ever', 'date', 'position'}
        temp.round = race_next.round
        temp = temp[list(stay)]
        if np.isnan(temp.AvgFn):
            temp.AvgFn = 10.0

        if i == 830:
            temp.wins_cum = 34
            temp.wins_season = 14
            temp.AvgFn = 1.0
            temp.points_season = 100
            print(temp)

        if i == 825:
            print(temp)

        df_temp = pd.DataFrame([temp], columns=temp.index)
        # order columns to['constructorId', 'laps', 'nationality', 'round', 'circuitId', 'country_circuit', 'height', 'nationality_constructors', 'points_season', 'position_season', 'wins_season', 'age', 'weather_condition', 'humidity', 'temperature', 'AvgGrid', 'AvgFn', 'wins_cum']

        df_temp = df_temp[['constructorId', 'laps', 'nationality', 'round', 'circuitId', 'country_circuit', 'height',
                           'nationality_constructors', 'points_season', 'position_season', 'wins_season', 'age',
                           'weather_condition', 'humidity', 'temperature', 'AvgGrid', 'AvgFn', 'wins_cum']]

        rank[i] = float(model_eval.predict(df_temp)[0])
    except:
        print("Failed to predict driver: ", i)
        continue



Failed to predict driver:  858
AvgGrid                       4.0
AvgFn                         2.0
points_season                18.0
wins_season                     0
weather_condition               1
round                           1
laps                           57
humidity                    67.67
temperature                 23.07
wins_cum                      0.0
constructorId                   1
country_circuit                 9
circuitId                      70
age                            21
nationality_constructors        0
height                      678.0
position_season                 2
nationality                    21
Name: 116, dtype: object
AvgGrid                      11.0
AvgFn                         1.0
points_season                 100
wins_season                    14
weather_condition               1
round                           1
laps                           32
humidity                    67.67
temperature                 23.07
wins_cum                  

In [278]:
# sort the rank by value
rank = dict(sorted(rank.items(), key=lambda item: item[1]))

for i in rank:
    print(drivers.loc[drivers.driverId == i].iloc[0].forename, drivers.loc[drivers.driverId == i].iloc[0].surname,
          rank[i])
    rank[i] = {
        "name": drivers.loc[drivers.driverId == i].iloc[0].forename + " " + drivers.loc[drivers.driverId == i].iloc[
            0].surname,
        "rank": rank[i]
    }

# save the rank
import json

with open('../result/rank.json', 'w') as fp:
    json.dump(rank, fp)

Nico Hülkenberg -2.5616207122802734
George Russell -1.6782430410385132
Sergio Pérez -0.8332539796829224
Lance Stroll -0.4925088882446289
Alexander Albon 0.42882275581359863
Esteban Ocon 0.6679508090019226
Valtteri Bottas 0.7021191716194153
Pierre Gasly 0.7182775735855103
Fernando Alonso 0.9628542065620422
Lando Norris 1.5257099866867065
Max Verstappen 3.301701068878174
Charles Leclerc 3.5545973777770996
Guanyu Zhou 10.34870719909668
Nyck de Vries 19.149330139160156
Yuki Tsunoda 20.355762481689453
Carlos Sainz 24.133258819580078
Lewis Hamilton 77.54512786865234
Kevin Magnussen 83.65229797363281


In [None]:
import requests

# forecast
url = "https://api.open-meteo.com/v1/forecast"
params = {
    "latitude": race_next["lat"],
    "longitude": race_next["lng"],
    "start_date": race_next["date"].strftime("%Y-%m-%d"),
    "end_date": race_next["date"].strftime("%Y-%m-%d"),
    "hourly": "relativehumidity_2m,weathercode,temperature_2m"
}

response = requests.get(url, params=params)
print(response.url)
data = response.json()

race_next['time']

In [238]:
df.sort_values(by='date', ascending=False)

Unnamed: 0,resultId,raceId,driverId,constructorId,number,grid,position,positionText,positionOrder,points,laps,time,milliseconds,fastestLap,rank,fastestLapTime,fastestLapSpeed,statusId,driverRef,number_drivers,code,forename,surname,dob,nationality,url,year,round,circuitId,name,date,time_races,url_races,fp1_date,fp1_time,fp2_date,fp2_time,fp3_date,fp3_time,quali_date,quali_time,sprint_date,sprint_time,circuitRef,name_circuits,location,country,lat,lng,alt,url_circuits,constructorRef,name_constructors,nationality_constructors,url_constructors,status,driverStandingsId,points_acc,position_acc,positionText_acc,wins,age,weather_condition,humidity,wmo_code,temperature,faster_lap_circuit_ever,AvgGrid,AvgFn,wins_cum
9613,25835,1096,20,117,5,9,10.0,10,10,1.0,58,+1:23.898,5349812,35,15,1:30.312,210.510,1,vettel,5,VET,Sebastian,Vettel,1987-07-03,German,http://en.wikipedia.org/wiki/Sebastian_Vettel,2022,22,24,Abu Dhabi Grand Prix,2022-11-20,13:00:00,http://en.wikipedia.org/wiki/2022_Abu_Dhabi_Gr...,2022-11-18,10:00:00,2022-11-18,13:00:00,2022-11-19,11:00:00,2022-11-19,14:00:00,,,yas_marina,Yas Marina Circuit,Abu Dhabi,UAE,24.4672,54.60310,3.0,http://en.wikipedia.org/wiki/Yas_Marina_Circuit,aston_martin,Aston Martin,British,http://en.wikipedia.org/wiki/Aston_Martin_in_F...,Finished,71824,37.0,12,12,0,35,dry,59.33,0.0,28.0,86103.0,6.15,5.28,517.0
9614,25833,1096,840,117,18,14,8.0,8,8,4.0,58,+1:16.931,5342845,42,9,1:29.620,212.135,1,stroll,18,STR,Lance,Stroll,1998-10-29,Canadian,http://en.wikipedia.org/wiki/Lance_Stroll,2022,22,24,Abu Dhabi Grand Prix,2022-11-20,13:00:00,http://en.wikipedia.org/wiki/2022_Abu_Dhabi_Gr...,2022-11-18,10:00:00,2022-11-18,13:00:00,2022-11-19,11:00:00,2022-11-19,14:00:00,,,yas_marina,Yas Marina Circuit,Abu Dhabi,UAE,24.4672,54.60310,3.0,http://en.wikipedia.org/wiki/Yas_Marina_Circuit,aston_martin,Aston Martin,British,http://en.wikipedia.org/wiki/Aston_Martin_in_F...,Finished,71815,18.0,15,15,0,24,dry,59.33,0.0,28.0,86103.0,13.80,11.37,0.0
9624,25845,1096,4,214,14,10,,R,20,0.0,27,,,24,17,1:30.579,209.889,47,alonso,14,ALO,Fernando,Alonso,1981-07-29,Spanish,http://en.wikipedia.org/wiki/Fernando_Alonso,2022,22,24,Abu Dhabi Grand Prix,2022-11-20,13:00:00,http://en.wikipedia.org/wiki/2022_Abu_Dhabi_Gr...,2022-11-18,10:00:00,2022-11-18,13:00:00,2022-11-19,11:00:00,2022-11-19,14:00:00,,,yas_marina,Yas Marina Circuit,Abu Dhabi,UAE,24.4672,54.60310,3.0,http://en.wikipedia.org/wiki/Yas_Marina_Circuit,alpine,Alpine F1 Team,French,http://en.wikipedia.org/wiki/Alpine_F1_Team,Water leak,71812,81.0,9,9,0,41,dry,59.33,0.0,28.0,86103.0,8.68,6.26,346.0
9623,25844,1096,849,3,6,20,19.0,19,19,0.0,55,,,45,14,1:30.309,210.517,130,latifi,6,LAT,Nicholas,Latifi,1995-06-29,Canadian,http://en.wikipedia.org/wiki/Nicholas_Latifi,2022,22,24,Abu Dhabi Grand Prix,2022-11-20,13:00:00,http://en.wikipedia.org/wiki/2022_Abu_Dhabi_Gr...,2022-11-18,10:00:00,2022-11-18,13:00:00,2022-11-19,11:00:00,2022-11-19,14:00:00,,,yas_marina,Yas Marina Circuit,Abu Dhabi,UAE,24.4672,54.60310,3.0,http://en.wikipedia.org/wiki/Yas_Marina_Circuit,williams,Williams,British,http://en.wikipedia.org/wiki/Williams_Grand_Pr...,Collision damage,71819,2.0,20,20,0,27,dry,59.33,0.0,28.0,86103.0,16.67,15.43,0.0
9622,25843,1096,1,131,44,5,18.0,18,18,0.0,55,,,42,11,1:29.788,211.738,9,hamilton,44,HAM,Lewis,Hamilton,1985-01-07,British,http://en.wikipedia.org/wiki/Lewis_Hamilton,2022,22,24,Abu Dhabi Grand Prix,2022-11-20,13:00:00,http://en.wikipedia.org/wiki/2022_Abu_Dhabi_Gr...,2022-11-18,10:00:00,2022-11-18,13:00:00,2022-11-19,11:00:00,2022-11-19,14:00:00,,,yas_marina,Yas Marina Circuit,Abu Dhabi,UAE,24.4672,54.60310,3.0,http://en.wikipedia.org/wiki/Yas_Marina_Circuit,mercedes,Mercedes,German,http://en.wikipedia.org/wiki/Mercedes-Benz_in_...,Hydraulics,71806,240.0,6,6,0,37,dry,59.33,0.0,28.0,86103.0,3.84,3.38,1008.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23318,20036,833,579,51,1,3,,R,12,0.0,62,,,,,,,44,fangio,,,Juan,Fangio,1911-06-24,Argentine,http://en.wikipedia.org/wiki/Juan_Manuel_Fangio,1950,1,9,British Grand Prix,1950-05-13,,http://en.wikipedia.org/wiki/1950_British_Gran...,,,,,,,,,,,silverstone,Silverstone Circuit,Silverstone,UK,52.0786,-1.01694,153.0,http://en.wikipedia.org/wiki/Silverstone_Circuit,alfa,Alfa Romeo,Swiss,http://en.wikipedia.org/wiki/Alfa_Romeo_in_For...,Oil leak,43212,0.0,13,13,0,38,dry,62.67,0.0,16.4,,2.52,2.34,98.0
23319,20032,833,741,154,16,14,8.0,8,8,0.0,65,,,,,,,15,etancelin,,,Philippe,Étancelin,1896-12-28,French,http://en.wikipedia.org/wiki/Philippe_%C3%89ta...,1950,1,9,British Grand Prix,1950-05-13,,http://en.wikipedia.org/wiki/1950_British_Gran...,,,,,,,,,,,silverstone,Silverstone Circuit,Silverstone,UK,52.0786,-1.01694,153.0,http://en.wikipedia.org/wiki/Silverstone_Circuit,lago,Talbot-Lago,French,http://en.wikipedia.org/wiki/Talbot-Lago,+5 Laps,43208,0.0,8,8,0,53,dry,62.67,0.0,16.4,,14.00,8.50,0.0
23320,20043,833,789,154,17,7,,R,19,0.0,8,,,,,,,51,martin,,,Eugène,Martin,1915-03-24,French,http://en.wikipedia.org/wiki/Eug%C3%A8ne_Martin,1950,1,9,British Grand Prix,1950-05-13,,http://en.wikipedia.org/wiki/1950_British_Gran...,,,,,,,,,,,silverstone,Silverstone Circuit,Silverstone,UK,52.0786,-1.01694,153.0,http://en.wikipedia.org/wiki/Silverstone_Circuit,lago,Talbot-Lago,French,http://en.wikipedia.org/wiki/Talbot-Lago,Oil pressure,43219,0.0,20,20,0,35,dry,62.67,0.0,16.4,,7.00,,0.0
23321,20037,833,776,126,23,19,,N,13,0.0,57,,,,,,,62,kelly,,,Joe,Kelly,1913-03-13,Irish,http://en.wikipedia.org/wiki/Joe_Kelly_(Formul...,1950,1,9,British Grand Prix,1950-05-13,,http://en.wikipedia.org/wiki/1950_British_Gran...,,,,,,,,,,,silverstone,Silverstone Circuit,Silverstone,UK,52.0786,-1.01694,153.0,http://en.wikipedia.org/wiki/Silverstone_Circuit,alta,Alta,British,http://en.wikipedia.org/wiki/Alta_auto_racing_...,Not classified,43213,0.0,14,14,0,37,dry,62.67,0.0,16.4,,18.50,,0.0
