In [1]:
import pandas as pd

## read/process data

In [41]:
# Load the data
free4all = pd.read_csv('FreeForAll.csv', header=None)
head2head = pd.read_csv('HeadToHead.csv', header=None)

# Assign column names
free4all.columns = ['DateTime', 'MatchID', 'Unused1', 'Unused2', 'PlayerID', 'Unused3', 'Score']
head2head.columns = ['DateTime', 'MatchID', 'Unused1', 'Unused2', 'PlayerID', 'Unused3', 'Score']


In [42]:
# For free4all
free4all['MatchID'] = free4all['MatchID'].astype(int)
free4all['PlayerID'] = free4all['PlayerID'].astype(str)  # PlayerID seems to be a string
free4all['Score'] = free4all['Score'].astype(int)

# For head2head
head2head['MatchID'] = head2head['MatchID'].astype(int)
head2head['PlayerID'] = head2head['PlayerID'].astype(str)
head2head['Score'] = head2head['Score'].astype(int)


In [43]:
# dates
free4all['DateTime'] = pd.to_datetime(free4all['DateTime'])
head2head['DateTime'] = pd.to_datetime(head2head['DateTime'])


## see data

In [44]:
print(free4all.head())
print(head2head.head())


             DateTime  MatchID  Unused1  Unused2          PlayerID  Unused3  \
0 2004-07-08 15:45:00   215333        2       10  2726956381513258        2   
1 2004-07-08 15:45:00   215333        2       10  2573485501354547        0   
2 2004-07-08 15:45:00   215333        2       10  2534615147427891        1   
3 2004-07-08 15:45:00   215333        2       10  2573485501354543        3   
4 2004-07-08 16:00:42   215334        2       10  2534615147427891        0   

   Score  
0      1  
1      0  
2      7  
3      2  
4     14  
             DateTime  MatchID  Unused1  Unused2          PlayerID  Unused3  \
0 2004-08-06 18:13:50   281884        2       50  2726956381513270        1   
1 2004-08-06 18:13:50   281884        2       50  2741700308864766        0   
2 2004-08-06 18:19:41   281900        2       20  2731647631125118        0   
3 2004-08-06 18:19:41   281900        2       20  2740359951832997        1   
4 2004-08-06 18:19:55   281902        2       50  27370090592530

In [45]:
print(free4all['Score'].describe())
print(head2head['Score'].describe())


count    286926.000000
mean          7.110279
std           4.926285
min         -54.000000
25%           3.000000
50%           7.000000
75%          10.000000
max          50.000000
Name: Score, dtype: float64
count    12454.000000
mean         3.692870
std          3.044047
min        -39.000000
25%          1.000000
50%          4.000000
75%          7.000000
max         10.000000
Name: Score, dtype: float64


## construct hypergraph

### Initialize Data Structures

In [48]:
import numpy as np
import networkx as nx

# Since NetworkX doesn't support hypergraphs natively, we represent the hypergraph using custom data structures
players = free4all['PlayerID'].unique()
player_indices = {player_id: idx for idx, player_id in enumerate(players)}
n = len(players)

# Initialize degree dictionaries
d_v = np.zeros(n)  # Vertex degrees


### Create Hyperedges and Compute Weights

Hyperedge Weights (ω(e))
According to the paper:

ω(e) = (standard deviation of scores in match e) + 1


Edge-Dependent Vertex Weights (γₑ(v))

γₑ(v) = exp(score of player v in match e)

Compute δ(e) = Σ₍ᵥ∈ₑ₎ γₑ(v)

In [49]:
# Initialize structures
hyperedges = {}  # Key: MatchID, Value: list of player IDs
omega_e = {}     # Hyperedge weights
delta_e = {}     # Sum of gamma_e(v) over v in e
gamma_e = {}     # Key: (MatchID, PlayerID), Value: gamma_e(v)

# Process each free-for-all match
for match_id, group in free4all.groupby('MatchID'):
    player_ids = group['PlayerID'].tolist()
    scores = group['Score'].tolist()
    # Compute gamma_e(v)
    gamma_values = np.exp(scores)
    # Store hyperedge information
    hyperedges[match_id] = player_ids
    # Compute ω(e)
    std_score = np.std(scores)
    omega = std_score + 1  # Avoid ω(e) = 0
    omega_e[match_id] = omega
    # Compute δ(e)
    delta = np.sum(gamma_values)
    delta_e[match_id] = delta
    # Store γₑ(v)
    for pid, gamma_v in zip(player_ids, gamma_values):
        gamma_e[(match_id, pid)] = gamma_v
    # Update vertex degrees d(v)
    for pid in player_ids:
        idx = player_indices[pid]
        d_v[idx] += omega


### Construct the Transition Matrix

$p_{v,w}=∑_{e∈E(v)} (ω(e)/d(v)) (γ_e(w)/δ(e))$

#### initialize transition matrix

In [50]:
P = np.zeros((n, n))

#### compute transition probabilities

In [51]:
for match_id, player_ids in hyperedges.items():
    omega = omega_e[match_id]
    delta = delta_e[match_id]
    for v in player_ids:
        idx_v = player_indices[v]
        d_v_v = d_v[idx_v]
        # Avoid division by zero
        if d_v_v == 0:
            continue
        for w in player_ids:
            idx_w = player_indices[w]
            gamma_w = gamma_e[(match_id, w)]
            P[idx_v, idx_w] += (omega / d_v_v) * (gamma_w / delta)


#### verify transition matrix

In [52]:
row_sums = P.sum(axis=1)
print(f"Row sums (should be close to 1): min={row_sums.min()}, max={row_sums.max()}")


Row sums (should be close to 1): min=0.9999999999999987, max=1.000000000000001


### random walk with restart

#### define restart probability

In [53]:
beta = 0.4  # Restart probability as per the paper


#### modify transition matrix

In [54]:
# Identity matrix
I = np.identity(n)

# Adjust the transition matrix
P_rw = (1 - beta) * P + beta * I


### compute the stationary distribution

In [55]:
def compute_stationary_distribution(P, tol=1e-6, max_iter=1000):
    n = P.shape[0]
    v = np.ones(n) / n  # Start with uniform distribution
    for i in range(max_iter):
        v_new = P.T @ v
        if np.linalg.norm(v_new - v, ord=1) < tol:
            print(f"Converged after {i+1} iterations")
            break
        v = v_new
    return v

stationary_distribution = compute_stationary_distribution(P_rw)


### rank the players

In [56]:
# Create a mapping from index to player ID
index_to_player = {idx: pid for pid, idx in player_indices.items()}

# Create a list of (PlayerID, Score)
ranking = [(index_to_player[idx], score) for idx, score in enumerate(stationary_distribution)]

# Sort the ranking
ranking.sort(key=lambda x: x[1], reverse=True)

# Display top 10 players
for rank, (player_id, score) in enumerate(ranking[:10], start=1):
    print(f"Rank {rank}: PlayerID {player_id}, Score {score}")


Rank 1: PlayerID 2573485501354592, Score 0.04946070224492898
Rank 2: PlayerID 2534615147427902, Score 0.03071966398073391
Rank 3: PlayerID 2728296738545264, Score 0.029687577691484595
Rank 4: PlayerID 2727626560029400, Score 0.017134364530741297
Rank 5: PlayerID 2573485501354550, Score 0.014522938312585058
Rank 6: PlayerID 2742370487380979, Score 0.011993129663759946
Rank 7: PlayerID 2534615147427891, Score 0.0111911417811651
Rank 8: PlayerID 2731647631125295, Score 0.010958992798726874
Rank 9: PlayerID 2730977452609251, Score 0.010501430055279043
Rank 10: PlayerID 2573485501354625, Score 0.010359022652640751


### evaluate the ranking

#### prepare the 1-v-1 match data

In [59]:
# Prepare evaluation data
evaluation_data = []

for match_id, group in matches:
    if len(group) != 2:
        continue  # Ensure it's a 1-v-1 match
    players = group['PlayerID'].tolist()
    scores = group['Score'].tolist()
    if scores[0] == scores[1]:
        continue  # Skip ties
    winner_idx = 0 if scores[0] > scores[1] else 1
    loser_idx = 1 - winner_idx
    winner_id = players[winner_idx]
    loser_id = players[loser_idx]
    evaluation_data.append({'MatchID': match_id, 'WinnerID': winner_id, 'LoserID': loser_id})


#### evaluate prediction

In [60]:
correct_predictions = 0
total_matches = len(evaluation_data)

# Create a dictionary for quick lookup of player scores
player_scores = {player_id: score for player_id, score in ranking}

for match in evaluation_data:
    winner_id = match['WinnerID']
    loser_id = match['LoserID']
    winner_score = player_scores.get(winner_id, None)
    loser_score = player_scores.get(loser_id, None)
    if winner_score is None or loser_score is None:
        continue  # Skip if we don't have rankings for both players
    predicted_winner = winner_id if winner_score > loser_score else loser_id
    if predicted_winner == winner_id:
        correct_predictions += 1

accuracy = correct_predictions / total_matches * 100 if total_matches > 0 else 0
print(f"Accuracy of predictions: {accuracy:.2f}%")


Accuracy of predictions: 69.51%
