In [3]:
import polars as pl
import trueskill

## Create player dataframe

In [4]:
matches = pl.read_csv('data/matches.csv')

In [5]:
players = (
    matches
    .select(pl.col('winner').alias('tag'), pl.col('winner_url').alias('url'), 'tournament_date')
    .vstack(
        matches
        .select(pl.col('loser').alias('tag'), pl.col('loser_url').alias('url'), 'tournament_date')
    )
    .unique()
    .sort('tournament_date', descending=False)
    .group_by('url')
    .agg(pl.col('tag').mode().first())
    .select('tag', 'url')
)
pl.Config(tbl_rows=2)
players

tag,url
str,str
"""CP""","""/league/nemelee/player/5247BCE…"
…,…
"""Bmo""","""/league/nemelee/player/BCE94E8…"


# Calculate trueskill

In [6]:
pl.Config(tbl_rows=2)
matches

encounter_id,winner_url,winner,loser_url,loser,winner_score,loser_score,tournament_url,tournament_name,tournament_date,tournament_country,tournament_region,tournament_number_of_players
i64,str,str,str,str,i64,i64,str,str,str,str,str,str
26,"""/league/nemelee/player/EAE5E0C…","""Gobble Guts Aching Belly""","""/league/nemelee/player/3675716…","""PSI""",2,0,"""/tournament/14368246-4622-42D5…","""New Game Plus Revival 5.6""","""2023-12-12""",,,
…,…,…,…,…,…,…,…,…,…,…,…,…
52,"""/league/nemelee/player/73AF50A…","""Pizza""","""/league/nemelee/player/8782C1A…","""Pooky""",2,1,"""/tournament/669EFC77-6380-459E…","""New Game Plus Revival 3.11""","""2022-12-13""",,,


In [7]:
players_dict = {}

def rate_players(row: dict) -> dict:
    # Create TrueSkill rating objects for the players
    if players_dict.get(row['winner_url']) is None:
        players_dict[row['winner_url']] = trueskill.Rating()
    if players_dict.get(row['loser_url']) is None:
        players_dict[row['loser_url']] = trueskill.Rating()

    result = {
        'winner_rating': players_dict[row['winner_url']],
        'loser_rating': players_dict[row['loser_url']],
    }

    # Update the ratings based on the match outcome
    new_winner_rating, new_loser_rating = trueskill.rate_1vs1(
        players_dict[row['winner_url']], players_dict[row['loser_url']])
    players_dict[row['winner_url']] = new_winner_rating
    players_dict[row['loser_url']] = new_loser_rating

    result.update({
        'new_winner_rating': new_winner_rating,
        'new_loser_rating': new_loser_rating,
    })
    return result

In [8]:
players_dict = {}

matches_with_ratings = matches.with_columns(
    pl.struct(['winner_url', 'loser_url'])
        .map_elements(rate_players, return_dtype=pl.Struct)
        .alias('new_ratings')
).unnest('new_ratings')

## 95 CI
In TrueSkill, rating is a Gaussian distribution which starts from $\mathcal{ N }( 25, \frac{ 25 }{ 3 }^2 )$. $\mu$ is an average skill of player, and $\sigma$ is a confidence of the guessed rating. A real skill of player is between $\mu \pm 2\sigma$ with 95% confidence.

In [9]:
matches_with_ratings.write_csv('data/matches-with-ratings.csv')

## Export ranking

In [10]:
player_ratings = pl.DataFrame({
    'url': [player_url for player_url in players_dict.keys()],
    'rating': [rating for rating in players_dict.values()],
})

In [12]:
pl.Config(tbl_rows=50)
player_ratings.join(players, on='url').sort('rating', descending=True)

url,rating,tag
str,f64,str
"""/league/nemelee/player/FDFA0C9…",48.661081,"""Nouns | Aklo"""
"""/league/nemelee/player/566A026…",47.277305,"""2saint"""
"""/league/nemelee/player/891E861…",46.804071,"""RedBull GG IFM | aMSa"""
"""/league/nemelee/player/726A52B…",45.391225,"""YAMI"""
"""/league/nemelee/player/37B3203…",45.220107,"""Spark"""
"""/league/nemelee/player/30D4C0A…",44.493222,"""PF | Mekk"""
"""/league/nemelee/player/C206957…",44.187066,"""MATE | Kalvar"""
"""/league/nemelee/player/EB30F2B…",44.108582,"""Inky"""
"""/league/nemelee/player/6B2BEB6…",44.054739,"""DrLobster"""
"""/league/nemelee/player/F7441D6…",43.926348,"""De_party | Epoodle"""
