# Colley ranking

Description: Construct a Colley ranking of data.
  
Created by Tim Chartier and adapted by Eric Wang

### Set parameters

gameFilename - game data file, presumed to be in the format from 
the Massey rating data server, which can be found at 
http://www.masseyratings.com/. 

teamFilename - team data file

k - number of teams to print in the final ranking - set to 0 to get all teams

In [1]:
import sys
!{sys.executable} -m pip install numpy pandas

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
gameFilename = './data/atp_all_matches.csv'
k = 10

### Load the games

In [3]:
from pprint import pprint
import pandas as pd

games = pd.read_csv(gameFilename)

gamesCount = len(games)

  exec(code_obj, self.user_global_ns, self.user_ns)


### Reassign new IDs for players

In [4]:
#player_names = pd.concat([games['winner_name'], games['loser_name']]).unique()

#player_id_map = {player_name: idx for idx, player_name in enumerate(player_names)}

#games['winner_id'] = games['winner_name'].map(player_id_map)
#games['loser_id'] = games['loser_name'].map(player_id_map)

### Create a list of players

In [5]:
players = {}

for i, game in games.iterrows():
    winner_id = game['winner_id']
    winner_name = game['winner_name']
    loser_id = game['loser_id']
    loser_name = game['loser_name']
    
    players[winner_id] = winner_name
    players[loser_id] = loser_name

playersCount = len(players)

### Create the Colley linear system

In [6]:
import numpy as np

colleyMatrix = 2*np.diag(np.ones(playersCount))
b = np.ones(playersCount)

for i, game in games.iterrows():
    
    colleyMatrix[game['winner_id'], game['loser_id']] -= 1
    colleyMatrix[game['loser_id'], game['winner_id']] -= 1

    colleyMatrix[game['winner_id'], game['winner_id']] += 1
    colleyMatrix[game['loser_id'], game['loser_id']] += 1

    b[game['winner_id']] += 1/2
    b[game['loser_id']] -= 1/2

### Calculate linear system

In [7]:
r = np.linalg.solve(colleyMatrix,b)
iSort = np.argsort(-r)

### Print the ranking of the players

In [8]:
print('\n\n************** COLLEY Rating Method **************\n')
print('===========================')
print('Rank   Rating    Player   ')
print('===========================')
if k==0:
    numberOfPlayersDisplayed = playersCount
else:
    numberOfPlayersDisplayed = k

for i in range(numberOfPlayersDisplayed):
    print(f'{i+1:4d}   {r[iSort[i]]:.5f}   {players[iSort[i]]}')

print('')   # extra carriage return



************** COLLEY Rating Method **************

Rank   Rating    Player   
   1   1.31145   Novak Djokovic
   2   1.29864   Rafael Nadal
   3   1.29024   Roger Federer
   4   1.22395   Carlos Alcaraz
   5   1.22377   Ivan Lendl
   6   1.21881   John McEnroe
   7   1.21261   Pete Sampras
   8   1.20671   Andy Murray
   9   1.20523   Bjorn Borg
  10   1.19669   Jimmy Connors



### Calculate predictability of method

In [9]:
numberOfCorrectPredictions = 0
for i, game in games.iterrows():
    winnerId = game['winner_id']
    loserId = game['loser_id']
    
    if r[winnerId] > r[loserId]:
        numberOfCorrectPredictions += 1

print(f'Predictability: {numberOfCorrectPredictions/gamesCount*100:.2f}%') 


Predictability: 69.54%


### Serialize results

In [10]:
import json

results = {
    'predictability': numberOfCorrectPredictions/gamesCount,
    'rankings': [],
}

for i in range(len(iSort)):
    results['rankings'].append({
        'ranking': i + 1,
        'rating': r[iSort[i]],
        'player': players[iSort[i]]
    })

json_data = json.dumps(results, indent=4)

with open('colley_results.json', 'w') as json_file:
    json_file.write(json_data)

print(f"Serialized results saved to colley_results.json.")

Serialized results saved to colley_results.json.
