In [53]:
import pandas 

In [54]:
dataframeResults = pandas.read_csv("subdataset/results.csv")
dataframeDrivers = pandas.read_csv("subdataset/drivers.csv")
dataframeConstructors = pandas.read_csv("subdataset/constructors.csv")
dataframeStatus = pandas.read_csv("subdataset/status.csv")
dataframeRaces = pandas.read_csv("subdataset/races.csv")

In [55]:
dataframeResults = dataframeResults.merge(dataframeDrivers, on = "driverId", how = "left")
dataframeResults = dataframeResults.merge(dataframeConstructors, on = "constructorId", how = "left")
dataframeResults = dataframeResults.merge(dataframeStatus, on = "statusId", how = "left")
dataframeResults = dataframeResults.merge(dataframeRaces, on = "raceId", how="left")
dataframeResults = dataframeResults.drop(columns=["driverId","constructorId",'url_x','nationality_x','url_y','nationality_y','statusId'])
dataframeResults = dataframeResults.drop(columns=["dob","positionText","positionOrder","number_x","number_y"])
dataframeResults = dataframeResults.drop(columns=["fp1_date","fp1_time","fp2_date","fp2_time","fp3_date","fp3_time"])
dataframeResults = dataframeResults.drop(columns=["quali_date","quali_time","sprint_date","sprint_time","url","time_y","date"])
dataframeResults = dataframeResults.drop(columns=["resultId","code","circuitId","forename","surname"])
dataframeResults = dataframeResults.drop(columns=["rank","fastestLap","round","time_x","raceId","name_x"])
dataframeResults = dataframeResults.loc[:,["year","name_y","driverRef","constructorRef","grid","position","points","status","fastestLapTime","fastestLapSpeed","laps","milliseconds"]]
dataframeResults = dataframeResults.replace("\\N","NaN")

In [56]:
dataframeResults

Unnamed: 0,year,name_y,driverRef,constructorRef,grid,position,points,status,fastestLapTime,fastestLapSpeed,laps,milliseconds
0,2008,Australian Grand Prix,hamilton,mclaren,1,1,10.0,Finished,01:27.5,218.3,58,5690616
1,2008,Australian Grand Prix,heidfeld,bmw_sauber,5,2,8.0,Finished,01:27.7,217.586,58,5696094
2,2008,Australian Grand Prix,rosberg,williams,7,3,6.0,Finished,01:28.1,216.719,58,5698779
3,2008,Australian Grand Prix,alonso,renault,11,4,5.0,Finished,01:28.6,215.464,58,5707797
4,2008,Australian Grand Prix,kovalainen,mclaren,3,5,4.0,Finished,01:27.4,218.385,58,5708630
...,...,...,...,...,...,...,...,...,...,...,...,...
26754,2024,Abu Dhabi Grand Prix,kevin_magnussen,haas,14,16,0.0,+1 Lap,01:25.6,222.002,57,
26755,2024,Abu Dhabi Grand Prix,lawson,rb,12,17,0.0,Engine,01:28.8,214.212,55,
26756,2024,Abu Dhabi Grand Prix,bottas,sauber,9,,0.0,Collision damage,01:29.5,212.462,30,
26757,2024,Abu Dhabi Grand Prix,colapinto,williams,20,,0.0,Engine,01:29.4,212.631,26,


In [None]:
import pandas as pd

# Initialize Elo ratings for all drivers (default 1500)
elo_ratings = {}

K = 30  # K-factor (controls rating changes)

# Function to calculate expected win probability
def expected_score(rating_A, rating_B):
    return 1 / (1 + 10 ** ((rating_B - rating_A) / 400))

# Elo update function
def update_elo(winner_rating, loser_ratings, K=30):
    avg_loser_rating = sum(loser_ratings) / len(loser_ratings) if loser_ratings else 1500
    expected_win = expected_score(winner_rating, avg_loser_rating)

    # Winner gains Elo
    new_winner_rating = winner_rating + K * (1 - expected_win)

    # Losers lose Elo
    new_loser_ratings = [
        loser_rating + K * (0 - expected_score(loser_rating, winner_rating))
        for loser_rating in loser_ratings
    ]

    return new_winner_rating, new_loser_ratings

# Process races in chronological order
dataframeResults = dataframeResults.sort_values(by=["year", "name_y"])

for race_name, race_df in dataframeResults.groupby("name_y"):
    winner_row = race_df[race_df["position"] == 1]
    if winner_row.empty:
        continue  # Skip if no winner

    winner_driver = winner_row.iloc[0]["driverRef"]
    
    # Ensure driver has an Elo rating
    if winner_driver not in elo_ratings:
        elo_ratings[winner_driver] = 1500

    winner_rating = elo_ratings[winner_driver]

    # Get all other drivers (losers)
    loser_rows = race_df[race_df["position"] > 1]
    loser_drivers = loser_rows["driverRef"].tolist()

    # Ensure all losing drivers have an Elo rating
    for driver in loser_drivers:
        if driver not in elo_ratings:
            elo_ratings[driver] = 1500

    loser_ratings = [elo_ratings[driver] for driver in loser_drivers]

    # Update Elo ratings
    new_winner_rating, new_loser_ratings = update_elo(winner_rating, loser_ratings, K)

    # Store updated ratings
    elo_ratings[winner_driver] = new_winner_rating
    for driver, new_rating in zip(loser_drivers, new_loser_ratings):
        elo_ratings[driver] = new_rating

# Convert Elo ratings to DataFrame
elo_df = pd.DataFrame(list(elo_ratings.items()), columns=["driverRef", "eloRating"])

# Ensure `driverRef` is consistent before merging
elo_df["driverRef"] = elo_df["driverRef"].astype(str)
dataframeResults["driverRef"] = dataframeResults["driverRef"].astype(str)

# Merge updated Elo ratings into main dataset
dataframeResults = dataframeResults.merge(elo_df, on="driverRef", how="left")

# Debug: Check if Elo values have changed
print(dataframeResults[["driverRef", "eloRating"]].drop_duplicates().head())


In [58]:
dataframeResults

Unnamed: 0,year,name_y,driverRef,constructorRef,grid,position,points,status,fastestLapTime,fastestLapSpeed,laps,milliseconds,eloRating
0,1950,Belgian Grand Prix,fangio,alfa,2,1,8.0,Finished,,,35,10046000,1500
1,1950,Belgian Grand Prix,fagioli,alfa,3,2,6.0,Finished,,,35,10060000,1500
2,1950,Belgian Grand Prix,rosier,lago,8,3,4.0,Finished,,,35,10185000,1500
3,1950,Belgian Grand Prix,farina,alfa,1,4,4.0,Finished,,,35,10291000,1500
4,1950,Belgian Grand Prix,ascari,ferrari,7,5,2.0,+1 Lap,,,34,,1500
...,...,...,...,...,...,...,...,...,...,...,...,...,...
26754,2024,United States Grand Prix,albon,williams,14,16,0.0,+1 Lap,01:39.1,200.302,55,,1500
26755,2024,United States Grand Prix,bottas,sauber,16,17,0.0,+1 Lap,01:40.6,197.333,55,,1500
26756,2024,United States Grand Prix,ocon,alpine,12,18,0.0,+1 Lap,01:37.3,203.912,55,,1500
26757,2024,United States Grand Prix,zhou,sauber,18,19,0.0,+1 Lap,01:39.3,199.923,55,,1500
