In [1]:
import sys
!{sys.executable} -m pip install numpy matplotlib tqdm scipy

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
filename = './data/atp_all_matches.csv'

k = 10

### Load the games

In [3]:
import numpy as np
from pprint import pprint
import pandas as pd

games = pd.read_csv(filename)

games_count = len(games)

  exec(code_obj, self.user_global_ns, self.user_ns)


### Create a list of players

In [4]:
players = {}

for i, game in games.iterrows():
    winner_id = game['winner_id']
    winner_name = game['winner_name']
    loser_id = game['loser_id']
    loser_name = game['loser_name']
    
    players[winner_id] = winner_name
    players[loser_id] = loser_name

players_count = len(players)

### Create adjacency matrix

In [5]:
from tqdm import tqdm

load_existing_computations = True
alpha = 0.85 # teleportation parameter

if load_existing_computations:
    adjacency_matrix = np.load('pagerank_adjacency_matrix.npy')
    row_sums = np.load('pagerank_row_sums.npy')
    G = np.load('pagerank_G.npy')
else:
    adjacency_matrix = np.ones((players_count, players_count))
    row_sums = np.sum(adjacency_matrix, 1)
    G = np.zeros((players_count, players_count))

    for i, game in tqdm(games.iterrows(), total=games_count):
        winner_id = game['winner_id']
        loser_id = game['loser_id']

        adjacency_matrix[loser_id, winner_id] += 1
        row_sums[loser_id] += 1

        for i in range(players_count):
            if (row_sums[i] == 0):
                G[i,:] = np.ones((1,players_count))/players_count
            else:
                G[i, :] = alpha * adjacency_matrix[i,:]/row_sums[i] + (1-alpha)/players_count
    
    np.save('pagerank_adjacency_matrix', adjacency_matrix)
    np.save('pagerank_row_sums', row_sums)
    np.save('pagerank_G', G)

### Compute PageRank

In [6]:
iterations = 1000
v0 = np.zeros(players_count)
v0[np.random.randint(players_count)] = 1
v = v0
for i in range(iterations):
    v = v@G

### Display rankings

In [7]:
iSort = np.argsort(-v)
print('\n\n************** PageRank **************\n')
print('===========================')
print('Rank   Rating    Player   ')
print('===========================')

if k<=0:
    players_displayed = players_count
else:
    players_displayed = k

for i in range(players_displayed):
    print(f'{i+1:4d}   {v[iSort[i]]:.8f}  {players[iSort[i]]}')

print('')   # extra carriage return



************** PageRank **************

Rank   Rating    Team   
   1   0.00015917  Jimmy Connors
   2   0.00015880  Roger Federer
   3   0.00015582  Rafael Nadal
   4   0.00015581  Ivan Lendl
   5   0.00015580  Novak Djokovic
   6   0.00015368  Guillermo Vilas
   7   0.00015305  Ilie Nastase
   8   0.00015272  John McEnroe
   9   0.00015248  Andre Agassi
  10   0.00015142  Stefan Edberg



### Calculate the predictability of the method

In [8]:
correct_predictions_count = 0

for i, game in games.iterrows():
    winner_id = game['winner_id']
    loser_id = game['loser_id']

    if v[winner_id] > v[loser_id]:
        correct_predictions_count += 1

print(f'Predictability: {correct_predictions_count/games_count*100:.2f}%') 

Predictability: 67.53%


### Serialize results

In [13]:
import json

results = {
    'predictability': correct_predictions_count/games_count,
    'rankings': [],
}

for i in range(len(iSort)):
    results['rankings'].append({
        'ranking': i + 1,
        'rating': v[iSort[i]],
        'player': players[iSort[i]]
    })

json_data = json.dumps(results, indent=4)

with open('pagerank_results.json', 'w') as json_file:
    json_file.write(json_data)

print(f"Serialized results saved to pagerank_results.json.")

Serialized results saved to pagerank_results.json.
