## Elo Rating method

This is the Elo method.  The example is taken from "Who's #1" by Langville and Meyer and the results match.  Note, the data includes the entire 2009 NFL season, including all playoff games.  

Reference: https://www.gautamnarula.com/rating/

Adapted by Eric Wang

In [2]:
import sys
!{sys.executable} -m pip install numpy pandas

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import math 
from pprint import pprint
import pandas as pd
  
# Function to calculate the Probability 
def calc_probability(rating1, rating2): 
    return 1.0 * 1.0 / (1 + 1.0 * math.pow(10, 1.0 * (rating1 - rating2) / 400)) 
  
# Function to calculate Elo rating 
# K is a constant. 
# Player A wins over Player B.  
# tie = true if tie, false otherwise
def calc_elo_rating(Ra, Rb, K, tie): 
    
    # To calculate the Winning 
    # Probability of Player B 
    Pb = calc_probability(Ra, Rb) 
  
    # To calculate the Winning 
    # Probability of Player A 
    Pa = calc_probability(Rb, Ra) 
  
    # Updating the Elo Ratings 
    if tie:
       Ra = Ra + K * (1/2 - Pa) 
       Rb = Rb + K * (1/2 - Pb) 
    else:        
       Ra = Ra + K * (1 - Pa) 
       Rb = Rb + K * (0 - Pb) 
    
    return Ra, Rb

def get_games_won(scores):
    winner_score = 0
    loser_score = 0

    for set_score in scores.split():
        set_scores = set_score.split('-')
        
        if len(set_scores) != 2:
            continue
        
        if '[' in set_scores[0]:
            tiebreak_score = set_score[1:-1]
            winner_score_to_add, loser_score_to_add = map(int, tiebreak_score.split('-'))
        elif '(' in set_scores[1]:
            winner_score_to_add = int(set_scores[0])
            loser_score_to_add = int(set_scores[1].split('(')[0])
        else:
            winner_score_to_add = int(set_scores[0])
            loser_score_to_add = int(set_scores[1])
        
        winner_score += winner_score_to_add
        loser_score += loser_score_to_add

    return winner_score, loser_score

In [4]:
filename = './data/atp_all_matches.csv'

k = 10

### Load the games

In [5]:
games = pd.read_csv(filename)

games_count = len(games)

  exec(code_obj, self.user_global_ns, self.user_ns)


### Create a list of players

In [6]:
players = {}

for i, game in games.iterrows():
    winner_id = game['winner_id']
    winner_name = game['winner_name']
    loser_id = game['loser_id']
    loser_name = game['loser_name']
    
    players[winner_id] = winner_name
    players[loser_id] = loser_name

players_count = len(players)

### Create Elo ratings and calculate predictability

In [7]:
import numpy as np

elo_ratings = np.zeros(players_count)

correct_predictions_count = 0

for i, game in games.iterrows():
    winner_id = game['winner_id']
    loser_id = game['loser_id']
    winner_score, loser_score = get_games_won(game['score'])

    # Check if prediction correct
    if elo_ratings[winner_id] > elo_ratings[loser_id]:
        correct_predictions_count += 1
    elif winner_score == loser_score and elo_ratings[winner_id] == elo_ratings[loser_id]:
        correct_predictions_count += 1
    
    # Update ratings 
    if winner_score > loser_score:
        winner_rating, loser_rating = calc_elo_rating(elo_ratings[winner_id], elo_ratings[loser_id], 32, False)
    else:  
        winner_rating, loser_rating = calc_elo_rating(elo_ratings[winner_id], elo_ratings[loser_id], 32, True)
        
    elo_ratings[winner_id] = winner_rating
    elo_ratings[loser_id] = loser_rating

### Sort and print the ranking of teams

In [8]:
iSort = np.argsort(-elo_ratings)

print('\n\n************** ELO Rating Method **************\n')
print('===================================')
print('Rank   Rating       Player   ')
print('===================================')
for i in range(k):
    print(f'{i+1:4d}   {elo_ratings[iSort[i]]:.5f}    {players[iSort[i]]}')

print('')   # extra carriage return

print(f'Predictability: {correct_predictions_count/games_count*100:.2f}%') 



************** ELO Rating Method **************

Rank   Rating       Player   
   1   745.15423    Roger Federer
   2   666.36422    Rod Laver
   3   626.29927    Ken Rosewall
   4   610.14331    Daniil Medvedev
   5   604.42844    Boris Becker
   6   599.53688    Rafael Nadal
   7   598.36419    Tony Roche
   8   574.32691    John Newcombe
   9   551.53588    Cliff Richey
  10   548.53557    Arthur Ashe

Predictability: 65.66%


### Serialize results

In [9]:
import json

results = {
    'predictability': correct_predictions_count/games_count,
    'rankings': [],
}

for i in range(len(iSort)):
    results['rankings'].append({
        'ranking': i + 1,
        'rating': elo_ratings[iSort[i]],
        'player': players[iSort[i]]
    })

json_data = json.dumps(results, indent=4)

with open('elo_results.json', 'w') as json_file:
    json_file.write(json_data)

print(f"Serialized results saved to elo_results.json.")

Serialized results saved to elo_results.json.
