In [2]:
import fastf1
import pandas as pd
from fastf1.ergast import Ergast
import math


fastf1.Cache.enable_cache('/Users/alexforrester/Documents/F1')

# ergast includes grid position so quali can be done easily
ergast = Ergast(result_type='pandas', auto_cast=True, limit=None)

LOWER_YEAR_LIMIT = 2020
UPPER_YEAR_LIMIT = 2024

def elo_algorithm(win_loss, driver_current_elo, team_mate_current_elo):
    # potential to tweak k factor with age, races entered etc
    # difference in position factored?
    k_factor = 32
    q_driver = math.pow(10, float(driver_current_elo.iloc[0])/400)
    q_teammate = math.pow(10, float(team_mate_current_elo.iloc[0])/400)
    expected_score = q_driver / ( q_driver + q_teammate )
    new_elo = driver_current_elo + k_factor * ( win_loss - expected_score )
    return new_elo


driver_table = pd.DataFrame(columns=["driver_id", "current_elo", "highest_elo"])


for year in range(LOWER_YEAR_LIMIT, UPPER_YEAR_LIMIT):
    num_of_rounds = len(ergast.get_race_schedule(year))
    for i in range(1, num_of_rounds+1):
        results = ergast.get_race_results(season=year, round=i)
        for indx, result in results.content[0].iterrows():
            driver_id = result.driverId
            new_row = pd.DataFrame([[driver_id, 1000, 0]], columns=["driver_id", "current_elo", "highest_elo"])
            if driver_table.empty or not (driver_table['driver_id'] == result['driverId']).any():
                driver_table = pd.concat([driver_table, new_row], ignore_index=True)

        driver_table_copy = driver_table.copy()
        for indx, result in results.content[0].iterrows():
            driver_position_text = result["positionText"]
            driver_team = result.constructorName
            
            # sort out team sizes over 2
            # save the highest hit elo for each driver
            team_mate_x = results.content[0][(results.content[0]['constructorName'] == driver_team)]
            team_mate = team_mate_x[team_mate_x["driverId"] != result["driverId"]]
            try:
                if len(team_mate["positionText"]):
                    team_mate_position = team_mate["positionText"].item()
            except:
                team_mate_position = None
            finished = ["R", "D", "W", "E", "F", "N"]

            # check teammate finished too
            driver_position = result["position"]
            if team_mate_position and driver_position and driver_position_text not in finished and team_mate_position not in finished:
                win_loss = 0
                try:
                    if driver_position < team_mate["position"].item():
                        win_loss = 1
                except Exception as e:
                    continue 

                driver_current_elo = driver_table_copy[driver_table_copy["driver_id"] == result["driverId"]]["current_elo"]
                driver_highest_elo = driver_table_copy[driver_table_copy["driver_id"] == result["driverId"]]["highest_elo"].values[0]

                if not team_mate.empty:
                    try:
                        team_mate_current_elo = driver_table_copy.loc[
                            driver_table_copy["driver_id"] == team_mate["driverId"].iloc[0], "current_elo"
                        ]
                        if team_mate_current_elo.empty:
                            raise ValueError(f"Teammate's ELO not found for driver_id {team_mate['driverId'].iloc[0]}")
                    except Exception as e:
                        continue  # skip further processing for this driver
                else:
                    # skip if team_mate is empty
                    continue

                new_elo = elo_algorithm(win_loss=win_loss, driver_current_elo=driver_current_elo, team_mate_current_elo=team_mate_current_elo)
                if new_elo.item() > driver_highest_elo:
                    driver_table.loc[driver_table['driver_id'] == result["driverId"], "highest_elo"] = new_elo.item()

                driver_table.loc[driver_table['driver_id'] == result["driverId"], "current_elo"] = new_elo.item()
                

df_sorted = driver_table.sort_values(by='highest_elo')
display(df_sorted.tail(20))

        

    


Unnamed: 0,driver_id,current_elo,highest_elo
5,perez,888.601588,1028.627043
32,lawson,1014.338584,1032.350825
0,bottas,969.870314,1034.345723
23,tsunoda,1005.352353,1035.564722
8,giovinazzi,1005.696906,1035.825163
7,ocon,1042.842487,1042.842487
4,sainz,980.396633,1058.781918
17,stroll,882.270844,1058.812623
18,ricciardo,929.761408,1065.582804
16,kevin_magnussen,987.922848,1076.667662
