# Massey ranking

Description: Construct a Massey ranking of data.
  
Created by Tim Chartier

### Set parameters

gameFilename - game data file, presumed to be in the format from 
the Massey rating data server, which can be found at 
http://www.masseyratings.com/. 

teamFilename - team data file

k - number of teams to print in the final ranking - set to 0 to get all teams

In [27]:
import sys
!{sys.executable} -m pip install numpy matplotlib pandas

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [28]:
game_filename = 'atp2023games.csv'
k = 10

### Load the games

In [29]:
from pprint import pprint
import pandas as pd

games = pd.read_csv(game_filename)

pprint(games)
games_count = len(games)

                            tourney_id                  tourney_name surface  \
0                            2023-9900                    United Cup    Hard   
1                            2023-9900                    United Cup    Hard   
2                            2023-9900                    United Cup    Hard   
3                            2023-9900                    United Cup    Hard   
4                            2023-9900                    United Cup    Hard   
...                                ...                           ...     ...   
1979  2023-M-DC-2023-WG2-PO-RSA-LUX-01  Davis Cup WG2 PO: RSA vs LUX    Hard   
1980  2023-M-DC-2023-WG2-PO-TUN-CYP-01  Davis Cup WG2 PO: TUN vs CYP    Hard   
1981  2023-M-DC-2023-WG2-PO-TUN-CYP-01  Davis Cup WG2 PO: TUN vs CYP    Hard   
1982  2023-M-DC-2023-WG2-PO-TUN-CYP-01  Davis Cup WG2 PO: TUN vs CYP    Hard   
1983  2023-M-DC-2023-WG2-PO-TUN-CYP-01  Davis Cup WG2 PO: TUN vs CYP    Hard   

      draw_size tourney_level  tourney_

### Reassign new IDs for players

In [30]:
player_names = pd.concat([games['winner_name'], games['loser_name']]).unique()

player_id_map = {player_name: idx for idx, player_name in enumerate(player_names)}

games['winner_id'] = games['winner_name'].map(player_id_map)
games['loser_id'] = games['loser_name'].map(player_id_map)

### Create a list of players

In [31]:
players = {}

for i, game in games.iterrows():
    winner_id = game['winner_id']
    winner_name = game['winner_name']
    loser_id = game['loser_id']
    loser_name = game['loser_name']
    
    players[winner_id] = winner_name
    players[loser_id] = loser_name

players_count = len(players)

### Create the Massey linear system

In [32]:
import numpy as np

def get_games_won(scores):
    winner_score = 0
    loser_score = 0

    for set_score in scores.split():
        set_scores = set_score.split('-')
        
        if len(set_scores) != 2:
            continue
        
        if '[' in set_scores[0]:
            tiebreak_score = set_score[1:-1]
            winner_score_to_add, loser_score_to_add = map(int, tiebreak_score.split('-'))
        elif '(' in set_scores[1]:
            winner_score_to_add = int(set_scores[0])
            loser_score_to_add = int(set_scores[1].split('(')[0])
        else:
            winner_score_to_add = int(set_scores[0])
            loser_score_to_add = int(set_scores[1])
        
        winner_score += winner_score_to_add
        loser_score += loser_score_to_add

    return winner_score, loser_score

massey_matrix = np.zeros((players_count,players_count))
b = np.zeros(players_count)

for i, game in games.iterrows():
    winner_id = game['winner_id']
    loser_id = game['loser_id']
    winner_score, loser_score = get_games_won(game['score'])
    
    massey_matrix[winner_id, loser_id] -= 1
    massey_matrix[loser_id, winner_id] -= 1

    massey_matrix[winner_id, winner_id] += 1
    massey_matrix[loser_id, loser_id] += 1
    
    delta = abs(winner_score - loser_score)
    
    if winner_score > loser_score:
        b[winner_id] += delta
        b[loser_id] -= delta
    elif winner_score < loser_score:
        b[winner_id] -= delta
        b[loser_id] += delta
        
# replace last row with ones and 0 on RHS
massey_matrix[-1,:] = np.ones((1,players_count))
b[-1] = 0

### Calculate linear system

In [33]:
r = np.linalg.solve(massey_matrix,b)
iSort = np.argsort(-r)

LinAlgError: Singular matrix

### Print the ranking of the teams

In [None]:
print('\n\n************** MASSEY Rating Method **************\n')
print('===========================')
print('Rank   Rating    Team   ')
print('===========================')
if k==0:
    number_of_players_displayed = players_count
else:
    number_of_players_displayed = k

for i in range(number_of_players_displayed):
    print(f'{i+1:4d}   {r[iSort[i]]:.5f}  {players[iSort[i]]}')

print('')   # extra carriage return



************** MASSEY Rating Method **************

Rank   Rating    Team   
   1   8.96489   Utah
   2   6.01822   LA_Clippers
   3   5.67156   Phoenix
   4   5.56844   Milwaukee
   5   5.27511   Philadelphia
   6   4.81822   Denver
   7   4.23511   Brooklyn
   8   2.76489   LA_Lakers
   9   2.25822   Dallas
  10   2.14178   Atlanta



### Calculate predictability of method

In [None]:
correct_predictions_counter = 0
for i, game in games.iterrows():
    winner_id = game['winner_id']
    loser_id = game['loser_id']
    winner_score, loser_score = get_games_won(game['score'])
    
    if winner_score > loser_score and r[winner_score] > r[loser_score]:
        numberCorrectPredictions += 1
    elif loser_score > winner_score and r[loser_score] > r[winner_score]:
        numberCorrectPredictions += 1
    elif winner_score == loser_score and r[winner_score] == r[loser_score]:
        numberCorrectPredictions += 1

print(f'Predictability: {correct_predictions_counter/games_count*100:.2f}%') 


NameError: name 'r' is not defined