In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('Banco de Súmulas - Sumulas.csv')

# choose class
data = data[data['Classe Vela'] == 'IQFOIL Fem.']

In [13]:
# create dictionary to map competitor names to unique indices
competitor_to_index = {competitor: idx for idx, competitor in enumerate(data['Nome Competidor'].unique())}

# create matrix of size (n_players, n_players) with zeros
n_players = len(competitor_to_index)
matrix_alpha = np.zeros((n_players, n_players))
matrix_beta = np.zeros((n_players, n_players))

# create matrix of size (n_players, n_players) with zeros to store the number of competitions each competitor has participated
matrix_competitions = np.zeros((n_players, n_players))

# iterate through competitions
for competition in data['Nome Competição'].unique():
    # get data for this competition
    data_competition = data[data['Nome Competição'] == competition]
    data_competition = data_competition.drop_duplicates(subset='Nome Competidor', keep='first')

    # get list of competitors in this competition
    competitors = data_competition['Nome Competidor'].unique()
    # get number of competitors in this competition
    n_players = len(competitors)
    
    # iterate through competitors in this competition setting (i, j) = 1 if i beats j in this competition
    # get i and j from the dictionary
    for i in range(n_players):
        for j in range(n_players):
            competitor_i = competitors[i]
            competitor_j = competitors[j]
            posicao_i = data_competition[(data_competition['Nome Competidor'] == competitor_i)]['Posição Geral'].values[0]
            posicao_j = data_competition[(data_competition['Nome Competidor'] == competitor_j)]['Posição Geral'].values[0]

            # get index of competitor i and j in the matrix
            index_i = competitor_to_index[competitor_i]
            index_j = competitor_to_index[competitor_j]

            # update matrix of competitions
            matrix_competitions[index_i][index_j] += 1

            if posicao_i < posicao_j:
                # update the cumulative matrix
                matrix_alpha[index_i][index_j] += 1
                # update matrix beta with difference in points
                matrix_beta[index_i][index_j] += (posicao_j - posicao_i)/(posicao_i + posicao_j)

In [7]:
# divide matrix alpha by matrix competitions where the number of competitions is greater than 0
matrix_alpha = np.divide(matrix_alpha, matrix_competitions, out=np.zeros_like(matrix_alpha), where=matrix_competitions!=0)
# divide matrix beta by matrix competitions where the number of competitions is greater than 0
matrix_beta = np.divide(matrix_beta, matrix_competitions, out=np.zeros_like(matrix_beta), where=matrix_competitions!=0)

In [12]:
#checking results
write_matrix = pd.DataFrame(matrix_alpha)

# columns and index are the names of the competitors
write_matrix.columns = competitor_to_index.keys()
write_matrix.index = competitor_to_index.keys()

write_matrix["Alisa ENGELMANN"]

Hélène NOESMOEN          0.25
Emma WILSON              0.20
Maja DZIARNOWSKA         0.20
Islay WATSON             0.20
Sara WENNEKES            0.20
                         ... 
Nicole van der VELDEN    1.00
SUNAGA Yuki              1.00
DU Jie                   1.00
ZHENG Manjia             1.00
Helene NOESMOEN          1.00
Name: Alisa ENGELMANN, Length: 179, dtype: float64

In [8]:
# Keener's ranking

# i dont think we consider at all the fact that some competitors may have been in more competitions than others
# so we should normalize the matrix by the number of competitions each competitor was in
# DO IT LATER

# W = matrix_alpha
W = matrix_alpha

# get d vector, d=(W + W^T)1, where 1 is a vector of ones 
d = np.dot(W + W.T, np.ones(W.shape[0]))

# get D matrix, D = diag(d)
D = np.diag(d)

# perron frobeniun eigenvector of D^-1 W
eigenvalues, eigenvectors = np.linalg.eig(np.dot(np.linalg.inv(D), W))
idx = np.argmax(eigenvalues)
eigenvector = eigenvectors[idx]

# get the ranking
ranking = pd.DataFrame(eigenvector, index=competitor_to_index.keys(), columns=['ranking'])
ranking = ranking.sort_values(by='ranking', ascending=False)

ranking.head(30)

Unnamed: 0,ranking
Linda OPRANDI,0.388205+0.010687j
Demita VEGA DE LILLE,0.388205-0.010687j
Julia GÓMEZ ROA,0.384972+0.000000j
Emma Viktoria MILLEND,0.384972-0.000000j
Buse TUNC,0.378737-0.000000j
Lena HAVERLAND,0.378737+0.000000j
Pola WAWRZYNIAK,0.344380-0.000000j
Maya GYSLER,0.344380+0.000000j
Dilara URALP,0.340247-0.000000j
Jenna GIBSON,0.340247+0.000000j


In [9]:
# keener's ranking with beta matrix

# i dont think we consider at all the fact that some competitors may have been in more competitions than others
# so we should normalize the matrix by the number of competitions each competitor was in
# DO IT LATER

# S = matrix_beta
S = matrix_beta

# get d vector, d=(S + S^T)1, where 1 is a vector of ones
d = np.dot(S + S.T, np.ones(S.shape[0]))

# get D matrix, D = diag(d)
D = np.diag(d)

# create empty K matrix with same size as matrix_beta
K = np.zeros(matrix_beta.shape)

# for entry (i, j) in beta matrix, define h = ((i, j) + 1)/((i, j) + (j, i) + 2)
for i in range(n_players):
    for j in range(n_players):
        x = (matrix_beta[i][j] + 1)/(matrix_beta[i][j] + matrix_beta[j][i] + 2)
        h = 1/2 + 1/2 * np.sign(x - 1/2) * np.sqrt(abs(2*x - 1))
        K[i][j] = h

# perron frobeniun eigenvector of D^-1 K
eigenvalues, eigenvectors = np.linalg.eig(np.dot(np.linalg.inv(D), K))
idx = np.argmax(eigenvalues)
eigenvector = eigenvectors[idx]

# get the ranking
ranking = pd.DataFrame(eigenvector, index=competitor_to_index.keys(), columns=['ranking'])
ranking = ranking.sort_values(by='ranking', ascending=False)

ranking.head(30)

Unnamed: 0,ranking
Alisa ENGELMANN,0.123724+0.040909j
Ingrid PUUSTA,0.123724-0.040909j
Sara CHOLNOKY,0.107442+0.033959j
Sunaga YUKI,0.107442-0.033959j
Demita VEGA DE LILLE,0.097451+0.011470j
Manon BERGER,0.097451-0.011470j
Emma WILSON,0.087818+0.032264j
Maja DZIARNOWSKA,0.087818-0.032264j
Johanna HJERTBERG,0.082713+0.132266j
Fianne VAN DEN BRULE,0.082713-0.132266j
