In [1]:
import pandas as pd
import numpy as np

In [61]:
games = pd.read_csv("games_2011_to_2023.csv", parse_dates=["date"])
wins = games.loc[games.result == "W"]

In [117]:
class ELO:
    def __init__(
        self,
        winners,
        losers,
        ids=None,
        timestamps=None,
        k=20,
        elo_init=1500,
        elo_diff=400,
    ):
        self.k = k
        self.elo_init = 1500
        self.elo_diff = 400

        ELO.check_valid_games(winners, losers, ids, timestamps)
        ELO.check_valid_params(k, elo_init, elo_diff)

        # Assemble temporary empty dataframe to store ELOs
        self.competitors = sorted(
            list(set(self.elo_df.winner.unique()) | set(self.elo_df.loser.unique()))
        )
        init_elos = pd.DataFrame(columns=self.competitors, index=self.elo_df.index)
        init_elos.iloc[0, :] = self.elo_init

        # Initialize dataframe to store all competitions and ELOs
        self.elo_df = pd.DataFrame(
            {"id": ids, 'timestamp': timestamps, "winner": winners, "loser": losers}
        ).set_index("id")
        self.elo_df = pd.concat([self.elo_df, init_elos], axis=1)

        # Sort the dataframe by timestamp then index
        if timestamps is not None:
            self.elo_df = self.elo_df.sort_values(["timestamp"])

    # Compute the ELO of every competitor after each match
    def fit(self):
        for i in range(len(self.elo_df)):
            row = self.elo_df.iloc[i]
            winner_id = row["winner"]
            loser_id = row["loser"]

            winner_init_elo = (
                self.elo_df.iloc[i - 1][winner_id] if i > 0 else self.elo_init
            )
            loser_init_elo = (
                self.elo_df.iloc[i - 1][loser_id] if i > 0 else self.elo_init
            )

            winner_new_elo, loser_new_elo = self.compute_pairwise_elo(
                winner_init_elo, loser_init_elo
            )
            self.elo_df.loc[row.name, winner_id] = winner_new_elo
            self.elo_df.loc[row.name, loser_id] = loser_new_elo

    def compute_pairwise_elo(self, winner_elo, loser_elo):
        expected_outcome_prob = self.compute_expected_outcome_prob(
            winner_elo, loser_elo
        )
        return winner_elo + self.k * (1 - expected_outcome_prob), loser_elo - self.k * (
            1 - expected_outcome_prob
        )

    def compute_expected_outcome_prob(self, elo1, elo2):
        return 1 / (1 + 10 ** ((elo2 - elo1) / self.elo_diff))

    def check_valid_params(k, elo_init, elo_diff):
        assert isinstance(k, int) and k > 0
        assert isinstance(elo_init, int) and elo_init > 0
        assert isinstance(elo_diff, int) and elo_diff > 0

    def check_valid_games(winners, losers, ids, timestamps):
        # assert proper data types
        assert pd.api.types.is_list_like(winners)
        assert pd.api.types.is_list_like(losers)
        assert ids is None or pd.api.types.is_list_like(ids)
        assert timestamps is None or pd.api.types.is_list_like(timestamps)

        # check that winners, losers, ids, and timestamps have the same length
        assert len(winners) == len(losers)
        assert ids is None or len(ids) == len(winners)
        assert timestamps is None or len(timestamps) == len(winners)

        # check that all ids are unique
        assert ids is None or len(set(ids)) == len(ids)

        # check that no teams play against themselves
        for i in range(len(winners)):
            assert winners[i] != losers[i]

        # check that the timestamps are valid
        if timestamps is not None:
            for timestamp in timestamps:
                try:
                    pd.to_datetime(timestamp)
                except ValueError:
                    raise ValueError("Invalid timestamp: {}".format(timestamp))

    def show_elos(self):
        return self.elo_df

In [64]:
elo_df = (
    wins[["date", "contest_id", "school_id", "opponent_school_id"]]
    .sort_values(by="date")
    .set_index("date")
)
elo_df.columns = ["id", "winner", "loser"]

In [85]:
d = pd.DataFrame(index=range(10))

In [123]:
elo_df

Unnamed: 0_level_0,id,winner,loser
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2011-02-05,869377.0,180,184
2011-02-10,869378.0,518,184
2011-02-12,869391.0,52,184
2011-02-12,869379.0,193,639
2011-02-12,869386.0,180,1320
...,...,...,...
2023-05-21,2433933.0,513,322
2023-05-21,2433934.0,539,725
2023-05-27,2433917.0,513,746
2023-05-27,2433918.0,193,539


In [119]:
e = ELO(elo_df.winner, elo_df.loser, ids=elo_df.id)

  assert winners[i] != losers[i]


ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [120]:
e.fit()

AttributeError: 'ELO' object has no attribute 'fit'

In [104]:
e.show_elos()

Unnamed: 0,id,winner,loser,14,52,62,68,80,81,83,...,738,739,741,746,748,813,1320,11504,19651,30136
0,869377.0,180.0,184.0,,,,,,,,...,,,,,,,,,,
1,869378.0,518.0,184.0,,,,,,,,...,,,,,,,,,,
2,869391.0,52.0,184.0,,,,,,,,...,,,,,,,,,,
3,869379.0,193.0,639.0,,,,,,,,...,,,,,,,,,,
4,869386.0,180.0,1320.0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12519,,,,,,,,,,,...,,,,,,,,,,
12520,,,,,,,,,,,...,,,,,,,,,,
12521,,,,,,,,,,,...,,,,,,,,,,
12522,,,,,,,,,,,...,,,,,,,,,,
