In [41]:
import pandas as pd


In [44]:
df = pd.read_csv('atp_matches_2024.csv')
df = df[['winner_id', 'winner_name', 'loser_id', 'loser_name']]

player_names = df[['winner_id', 'winner_name']]
player_names = player_names.rename(columns={'winner_id':'player_id', 'winner_name':'player_name'})
player_names = player_names.drop_duplicates()


In [45]:
import numpy as np
from scipy.stats import truncnorm, norm


def conditional_outcome(s1, s2, result, sigma_t):
    mean_diff = s1 - s2
    std_diff = sigma_t

    if result == 1:
        a, b = 0, np.inf
    else:
        a, b = -np.inf, 0

    a_std = (a - mean_diff) / std_diff
    b_std = (b - mean_diff) / std_diff

    return a_std, b_std, mean_diff, std_diff

def gibbs_sampler_result(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t, n_iter, result):
    s1_samples = np.zeros(n_iter)
    s2_samples = np.zeros(n_iter)
    t_samples = np.zeros(n_iter)
    y_samples = np.zeros(n_iter)

    s1_current = mu_s1
    s2_current = mu_s2

    mu_prior = np.array([mu_s1, mu_s2]).reshape((2, 1))
    sigma_prior = np.array([[sigma_s1, 0], [0, sigma_s2]])
    sigma_prior_inv = np.linalg.inv(sigma_prior)

    A = np.array([1, -1]).reshape((1, 2))
    A_T = A.T

    for i in range(n_iter):
        a_std, b_std, mean_diff, std_diff = conditional_outcome(s1_current, s2_current, result, sigma_t)
        t_current = truncnorm.rvs(a=a_std, b=b_std, loc=mean_diff, scale=std_diff)
        sigma_post = np.linalg.inv(sigma_prior_inv + (A_T @ A) / sigma_t**2)
        mu_post = sigma_post @ (sigma_prior_inv @ mu_prior + (A_T * t_current) / sigma_t**2)
        s1_current, s2_current = np.random.multivariate_normal(mu_post.flatten(), sigma_post)
        s1_samples[i] = s1_current
        s2_samples[i] = s2_current
        t_samples[i] = t_current
        y_samples[i] = result

    mean_s1 = np.mean(s1_samples)
    var_s1 = np.var(s1_samples, ddof=1)
    mean_s2 = np.mean(s2_samples)
    var_s2 = np.var(s2_samples, ddof=1)

    return mean_s1, var_s1, mean_s2, var_s2

def win_probability_player1(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t):
    mu_diff = mu_s1 - mu_s2
    sigma_diff = np.sqrt(sigma_s1 + sigma_s2 + sigma_t**2)
    p_y1 = 1 - norm.cdf(0, loc=mu_diff, scale=sigma_diff)
    return p_y1




In [46]:
skills = {}
for index, row in df.iterrows():
    if row['winner_id'] not in skills:
        skills[row['winner_id']] = (25, 8.333)
    if row['loser_id'] not in skills:
        skills[row['loser_id']] = (25, 8.333)

In [47]:
n_iter = 100
sigma_t = 25/6

for index, row in df.iterrows():
    team1 = row['winner_id']
    team2 = row['loser_id']
    result = 1
    mu_s1, sigma_s1 = skills[team1]
    mu_s2, sigma_s2 = skills[team2]
    
    mean_s1, var_s1, mean_s2, var_s2 = gibbs_sampler_result(mu_s1, mu_s2, sigma_s1, sigma_s2, sigma_t, n_iter, result)
    
    skills[team1] = (mean_s1, var_s1)
    skills[team2] = (mean_s2, var_s2)
    

In [69]:
sorted_skills = sorted(skills.items(), key=lambda x: x[1][0], reverse=True)

for player in sorted_skills:
    player_id = player[0]
    player_name = player_names[player_names['player_id'] == player_id]['player_name'].values[0]
    player_skill = player[1][0]
    print(f'{player_name}: {player_skill:.2f}')

Jannik Sinner: 30.79
Carlos Alcaraz: 29.92
Alexander Zverev: 28.72
Daniil Medvedev: 28.70
Grigor Dimitrov: 28.65
Tommy Paul: 28.49
Karen Khachanov: 28.23
Taylor Fritz: 28.21
Alex De Minaur: 28.16
Hubert Hurkacz: 28.14
Andrey Rublev: 28.07
Casper Ruud: 27.93
Stefanos Tsitsipas: 27.77
Giovanni Mpetshi Perricard: 27.72
Rafael Nadal: 27.70
Novak Djokovic: 27.69
Holger Rune: 27.67
Jiri Lehecka: 27.60
Timofey Skatov: 27.51
Ugo Humbert: 27.49
Thiago Monteiro: 27.31
Yi Zhou: 27.31
Ricardas Berankis: 27.30
Alejandro Tabilo: 27.29
Facundo Bagnis: 27.15
Nam Hoang Ly: 27.14
Cristian Garin: 27.08
Fitriadi M Rifqi: 27.05
Tomas Machac: 27.02
Blaise Bicknell: 26.85
Felix Auger Aliassime: 26.83
Alexander Bublik: 26.82
Otto Virtanen: 26.82
Martin Damm: 26.78
Tallon Griekspoor: 26.75
Luciano Darderi: 26.74
Dominik Koepfer: 26.71
Alejandro Davidovich Fokina: 26.66
Pedro Martinez: 26.65
Moez Echargui: 26.64
Ramkumar Ramanathan: 26.64
Murkel Alejandro Dellien Velasco: 26.60
Jan Lennard Struff: 26.58
Andres 

IndexError: index 0 is out of bounds for axis 0 with size 0