In [222]:
import pandas as pd
import math
import os
from datetime import datetime

In [469]:
# Constants
initial_rating = 1500
initial_rd = 350
tau = 0.5
q = math.log(10) / 400
c = 1.5

# Glicko Functions
def g(rd):
    return 1 / (1 + 3 * (q**2) * (rd**2) / (math.pi**2))**0.5

def E(r, ri, rdi):
    return 1 / (1 + 10**(g(rdi) * (ri - r) / 400))

def d2(r, ri, rdi):
    inner = (g(rdi)**2) * E(r, ri, rdi) * (1 - E(r, ri, rdi))
    return 1 / ((q**2) * inner)

def new_rd(rd, d2):
    return 1 / ((1 / rd**2) + (1 / d2))**0.5

def new_rating(r, rd, ri, rdi, s, d2):
    return r + (q / ((1 / rd**2) + (1 / d2))) * g(rdi) * (s - E(r, ri, rdi))

# Function to increase RD over time due to inactivity
def increase_rd_over_time(player_ratings, current_time):
    for player, (rating, rd, last_active) in player_ratings.items():
        # Calculate time elapsed in days
        days_inactive = (current_time - last_active).days
        if days_inactive > 0:
            # Increase RD based on time elapsed
            new_rd = min(math.sqrt(rd**2 + c**2 * days_inactive), initial_rd)
            player_ratings[player] = (rating, new_rd, last_active)

def update_ratings(player_ratings, match_results, current_time):
    increase_rd_over_time(player_ratings, current_time)
    for match in match_results:
        player1, player2, result = match
        r1, rd1, last_active1 = player_ratings.get(player1, (initial_rating, initial_rd, current_time))
        r2, rd2, last_active2 = player_ratings.get(player2, (initial_rating, initial_rd, current_time))
        
        d2_1 = d2(r1, r2, rd2)
        d2_2 = d2(r2, r1, rd1)
        
        new_rd1 = new_rd(rd1, d2_1)
        new_rd2 = new_rd(rd2, d2_2)
        
        new_r1 = new_rating(r1, rd1, r2, rd2, result, d2_1)
        new_r2 = new_rating(r2, rd2, r1, rd1, 1 - result, d2_2)
        
        player_ratings[player1] = (new_r1, new_rd1, current_time)
        player_ratings[player2] = (new_r2, new_rd2, current_time)

players = {}
matches = [("player1", "player2", 1), ("player2", "player1", 0)]
update_ratings(players, matches, datetime.now())
print(players)


{'player1': (1720.1602564648724, 260.27316726259585, datetime.datetime(2024, 6, 3, 0, 49, 46, 115197)), 'player2': (1279.8397435351276, 260.27316726259585, datetime.datetime(2024, 6, 3, 0, 49, 46, 115197))}


In [470]:
tf = pd.read_csv('data/tournaments_wtt.tsv', sep='\t', parse_dates=['StartDateTime']).sort_values(['StartDateTime'])
tf

Unnamed: 0,EventName,EventType,EventTypeId,Country,City,ContinentCode,Subcontinent,StartDateTime,EndDateTime,EventId
3,WTT Contender Doha 2021,WTT Contender,34,Qatar,Doha,asia,west asia,2021-02-28 11:30:00,2021-03-06T23:59:59,2410
4,WTT Star Contender Doha 2021,WTT Star Contender,35,Qatar,Doha,asia,west asia,2021-03-05 19:30:00,2021-03-13T23:59:00,2411
1,Tokyo 2020 Olympic Games,Olympic Games,38,Japan,Tokyo,asia,east asia,2021-07-23 00:00:00,2021-08-08T00:00:00,2345
10,WTT Contender Budapest 2021,WTT Contender,34,Hungary,Budapest,europe,eastern europe,2021-08-15 00:00:00,2021-08-20T00:00:00,2487
7,2021 ITTF Czech International Open,ITTF International Open,80,Czechia,Olomouc,europe,eastern europe,2021-08-21 00:00:00,2021-08-25T00:00:00,2480
...,...,...,...,...,...,...,...,...,...,...
135,WTT Feeder Caracas 2024,WTT Feeder,81,Venezuela,Caracas,americas,south america,2024-10-30 00:00:00,2024-11-03T00:00:00,2912
115,WTT Champions Frankfurt 2024,WTT Champions,65,Germany,Frankfurt,europe,western europe,2024-11-03 00:00:00,2024-11-10T00:00:00,2877
127,WTT Feeder Düsseldorf II 2024,WTT Feeder,81,Germany,Düsseldorf,europe,western europe,2024-11-18 00:00:00,2024-11-22T00:00:00,2890
145,WTT Finals Fukuoka 2024,WTT Finals,75,Japan,Kitakyushu,asia,east asia,2024-11-20 00:00:00,2024-11-24T00:00:00,2947


In [471]:
directory = os.fsencode('data/wtt_cleaned/matches')

players = {}

for event in tf.itertuples():
    if not os.path.isfile(f'data/wtt_cleaned/matches/{event.EventId}.tsv'):
        continue

    df = pd.read_csv(f'data/wtt_cleaned/matches/{event.EventId}.tsv', sep='\t', parse_dates=['start'])
    # event_id	doc	fmt	gender	stage	stage_id	duration	start	a_id	b_id	x_id	y_id	res_a	res_x
    # 2345	TTEMTEAM--------------FNL-00010001--------	T	M	FNL	00010001	1560	2021-08-06 10:34:29	110267	105649	102832	101222	3	0
    matches = []
    for row in df[df.fmt != 'D'].itertuples():
        matches.append((row.a_id, row.x_id, int(row.res_a > row.res_x)))        
    update_ratings(players, matches, event.StartDateTime)

resf = pd.DataFrame.from_dict(players, orient='index')
resf

Unnamed: 0,0,1,2
107028,1734.528003,59.276294,2024-05-01
121582,1810.760865,60.586880,2024-05-01
107445,1498.380497,60.434843,2024-05-20
109002,1307.859843,67.653029,2024-05-01
117821,1890.138706,69.814188,2024-05-21
...,...,...,...
143785,1260.859980,257.083973,2024-05-13
200087,1201.391744,283.256792,2024-05-13
200319,1271.790034,256.178889,2024-05-13
146091,1417.356256,276.663988,2024-05-20


In [472]:
pf = pd.read_csv('data/wtt_cleaned/players.tsv', sep='\t').set_index('id')
pf

Unnamed: 0_level_0,org,name,gender
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
100001,IND,ANTHONY Amalraj,M
100032,EGY,ABDEL-AZIZ Farah,W
100079,NGR,ABIODUN Bode,M
100089,RSA,ABRAHAMS Luke,M
100154,TOG,AGBETOGLO Mawussi,M
...,...,...,...
208989,LBN,BADIH Ibrahim,M
208990,LBN,MOUKHTAR Hilal,M
209021,JPN,OKANO Shunsuke,M
209074,IND,MALIK Rahul,X


In [473]:
rf = resf.join(pf)
srf = rf.sort_values(0, ascending=False)

In [474]:
import json

with open('05302024_ranking_m.json', 'r') as f:
    data = json.load(f)
    mrank = {
        p: i + 1
        for i, p in enumerate(data)
    }

with open('05302024_ranking_w.json', 'r') as f:
    data = json.load(f)
    wrank = {
        p: i + 1
        for i, p in enumerate(data)
    }

In [475]:
w100 = srf[(srf.gender == 'W') & (srf[1] <= 100)].head(100)
w100[0] = w100[0].round().astype('int')
w100[1] = w100[1].round().astype('int')
w100.reset_index(inplace=True)
w100.drop(columns=['index', 'gender', 2], inplace=True)
w100.reset_index(inplace=True)
w100.rename(columns={'index': 'rank', 0: 'rating', 1: 'conf'}, inplace=True)
w100['rank'] += 1
w100['wttrank'] = pd.Series(wrank.get(name) or wrank.get(' '.join(reversed(name.split(' ', 1))), 9999) for name in w100.name).astype('int')
w100['wttdiff'] = w100.wttrank - w100['rank']
w100.to_csv('top100F.tsv', sep='\t', index=False)

In [476]:
with open('top100f.txt', 'w') as f:
    f.write('|rank|rating|±dev|org|name|wtt|diff|\n')
    f.write('|:-|:-|:-|:-|:-|:-|:-|\n')
    for row in w100.set_index('rank').itertuples():
        f.write('|'+ '|'.join((str(r) for r in row)) + '|\n')

In [477]:
w100 = srf[
    (srf.gender == 'M')
    & (srf[1] <= 100)
].head(100)
w100[0] = w100[0].round().astype('int')
w100[1] = w100[1].round().astype('int')
w100.reset_index(inplace=True)
w100.drop(columns=['index', 'gender', 2], inplace=True)
w100.reset_index(inplace=True)
w100.rename(columns={'index': 'rank', 0: 'rating', 1: 'conf'}, inplace=True)
w100['rank'] += 1
w100['wttrank'] = pd.Series(mrank.get(name) or mrank.get(' '.join(reversed(name.split(' ', 1))), 9999) for name in w100.name).astype('int')
w100['wttdiff'] = w100.wttrank - w100['rank']
w100.to_csv('top100M.tsv', sep='\t', index=False)

In [478]:
with open('top100m.txt', 'w') as f:
    f.write('|rank|rating|±dev|org|name|wtt|diff|\n')
    f.write('|:-|:-|:-|:-|:-|:-|:-|\n')
    for row in w100.set_index('rank').itertuples():
        f.write('|'+ '|'.join((str(r) for r in row)) + '|\n')

In [479]:
rf[rf.name == 'OJIO Haruna']

Unnamed: 0,0,1,2,org,name,gender
135179,1841.997079,134.140283,2023-08-22,JPN,OJIO Haruna,W


In [480]:
# manual fixes
# Jia Nan YUAN -> YUAN Jia Nan
# SUH Hyo Won -> SUH Hyowon
# YANG Ha Eun -> YANG Haeun
# LEE Sang Su -> LEE Sangsu

In [481]:
directory = os.fsencode('data/wtt_cleaned/matches')

players = {}

events = []
for event in tf.itertuples():
    if not os.path.isfile(f'data/wtt_cleaned/matches/{event.EventId}.tsv'):
        continue

    events.append(
        pd.read_csv(f'data/wtt_cleaned/matches/{event.EventId}.tsv', sep='\t', parse_dates=['start'])
    )

matchf = pd.concat(events)
matchf

Unnamed: 0,event_id,doc,fmt,gender,stage,stage_id,duration,start,a_id,b_id,x_id,y_id,res_a,res_x
0,2410,TTEMSINGLES-----------FNL-000100--,S,M,FNL,100,2884,2021-03-06 11:40:00,107028,,121582,,4,1
1,2410,TTEMSINGLES-----------RND2001100----------,S,M,RND2,1100,1225,2021-03-01 08:20:00,107445,,109002,,3,0
2,2410,TTEWDOUBLES-----------FNL-000100--,D,W,FNL,100,1481,2021-03-06 07:00:00,117820,110752.0,114706,114105.0,3,0
3,2410,TTEWSINGLES-----------FNL-000100--,S,W,FNL,100,3464,2021-03-06 11:00:00,117821,,123672,,4,2
4,2410,TTEXDOUBLES-----------FNL-000100--,D,X,FNL,100,3201,2021-03-06 08:20:00,121582,110797.0,105136,137894.0,3,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101,2865,TTEWSINGLES-----------RND1000400----------,S,W,RND1,400,1512,2024-05-22 05:45:00,136403,,124363,,3,0
102,2865,TTEWSINGLES-----------RND1000200----------,S,W,RND1,200,2834,2024-05-21 05:20:00,122716,,137465,,2,3
103,2865,TTEMSINGLES-----------RND1001100----------,S,M,RND1,1100,2561,2024-05-21 03:35:00,145550,,133893,,3,2
104,2865,TTEMSINGLES-----------RND2000300----------,S,M,RND2,300,1204,2024-05-22 06:20:00,117345,,132312,,3,0


In [482]:
xyb = matchf[(matchf.fmt != 'D') & ((matchf.a_id == 131148) | (matchf.x_id == 131148))]

In [483]:
wnames = xyb.merge(pf[['name']].rename(columns={'name': 'name_a'}), left_on='a_id', right_index=True).merge(pf[['name']].rename(columns={'name': 'name_x'}), left_on='x_id', right_index=True)

In [432]:
wnames[['res_a', 'res_x', 'name_a', 'name_x']].to_csv('tmp.tsv', sep='\t', index=False)