In [2]:
import pandas as pd
import numpy as np
import pyglicko2 as pg
win, loss, draw = pg.Outcome.WIN, pg.Outcome.LOSS, pg.Outcome.DRAW

# Calculate Glicko2 Ratings for F1 Drivers
This notebook calculates the Glicko2 (Elo-like) ratings for each F1 driver in each race using the `pyglicko2` library. For each race, only drivers who finished (have a numeric value in the `position` column) are considered. Each driver is compared against all other finishers in the same race, and ratings are updated accordingly.

In [4]:
class Player1(pg.Player):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.peak = self.r
        self.minimum = self.r

    def update_peak_min(self):
        self.peak = max(self.peak, self.r)
        self.minimum = min(self.minimum, self.r)

    def update(self, players, s):
        super().update(players, s)
        self.update_peak_min()

In [5]:
# Load race results
df = pd.read_csv('f1_res.csv')

# Only keep rows where 'position' is a number (driver finished the race)
df = df[pd.to_numeric(df['position'], errors='coerce').notnull()]
df['position'] = df['position'].astype(int)

# Get all unique driver IDs
driver_ids = df['driverId'].unique()

# Initialize Glicko2 players for each driver
players = {driver_id: Player1() for driver_id in driver_ids}

# Group by race
for race_id, race_df in df.groupby('raceId'):
    # Sort by finishing position
    race_df = race_df.sort_values('position')
    driver_list = race_df['driverId'].tolist()
    n = len(driver_list)
    # print(race_id)
    # For each driver, only update against those they finished ahead of
    for i, driver in enumerate(driver_list):
        opponents = []
        scores = []
        for j, opp in enumerate(driver_list):
            if j <= i:
                continue  # Only consider drivers behind (i < j)
            if opp in players:
                opponents.append(players[opp])
                scores.append(win)
        for f in range(len(opponents)):
            try:
                pg.update(players[driver], opponents[f], win)
                # players[driver].update_peak()
                # print(driver, players[driver].r)

            except OverflowError:
                continue
            except ZeroDivisionError:
                continue

In [6]:
players_df = pd.DataFrame([
    {
        'driverId': driver_id,
        'glicko2_rating': player.r,
        # 'rd': player.rd,
        # 'vol': player.vol,
        'peak_rating': getattr(player, 'peak', player.peak),
        'minimum_rating': getattr(player, 'minimum', player.minimum)
    }
    for driver_id, player in players.items()
])
players_df

Unnamed: 0,driverId,glicko2_rating,peak_rating,minimum_rating
0,nino-farina,1840.439014,2100.940583,1500.000000
1,luigi-fagioli,1656.711963,1987.549741,1337.689107
2,reg-parnell,1606.771788,1701.661670,1243.299673
3,yves-giraud-cabantous,1114.087550,1550.859400,1098.362694
4,louis-rosier,1184.183591,1668.042516,1088.348889
...,...,...,...,...
673,franco-colapinto,1126.356046,1500.000000,1065.594182
674,jack-doohan,1044.281787,1500.000000,975.119755
675,andrea-kimi-antonelli,1358.880368,1780.139804,1358.880368
676,isack-hadjar,1532.853856,1532.853856,1055.134287


In [7]:
players_df.sort_values(by='peak_rating', ascending=False).head(25)
# players_df.sort_values(by='peak_rating', ascending=False).to_csv('f1_glicko2.csv', index=False)

Unnamed: 0,driverId,glicko2_rating,peak_rating,minimum_rating
609,lewis-hamilton,1707.817745,2420.555131,1167.910136
613,sebastian-vettel,1341.440082,2374.472023,1030.945505
644,max-verstappen,1741.924804,2369.842929,1244.856869
603,nico-rosberg,2268.820915,2268.820915,1193.411422
525,michael-schumacher,1304.53698,2255.592645,1141.399161
574,jenson-button,1236.016327,2248.017726,926.732577
12,juan-manuel-fangio,1809.824893,2227.046249,1500.0
578,fernando-alonso,1323.251402,2200.922946,751.649203
17,johnnie-parsons,1111.38116,2184.597194,678.972342
634,valtteri-bottas,996.751684,2173.057665,874.875961
