# L5: rankings from pairwise comparisons
Here we explore how to extract hidden rankings from pairwise comparisons, e.g. games between teams in sport.


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import networkx as nx

In [None]:
import sys
sys.path.append('../../../src/')
import tools as tl
import plot as viz
from plot import BLACK
import io as io
import ranking_tools.springrank as sr
import ranking_tools.bradley_terry as bt
import ranking_tools.process_input_into_matrix as prcs

In [None]:
import scipy.stats as st
from sklearn.cluster import AffinityPropagation
from matplotlib.lines import Line2D
from adjustText import adjust_text
from scipy.stats import pearsonr, spearmanr

In [None]:
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

colormap = plt.cm.tab10
colors = {i: colormap(i) for i in range(20)}

In [None]:
import cv_tools as cvtl
from statsbombpy import sb


In [None]:
outdir_fig = '../figures/'
lecture_id = 5

In [None]:
seed = 10
prng = np.random.RandomState(seed)

# 0. Download code
- [SpringRank](https://github.com/LarremoreLab/SpringRank/blob/master/springrank/springrank.py)


# 1. Import data

**Source**: download a dataset from [StatsBomb open data](https://github.com/statsbomb/open-data/tree/master).  

We will use the python package [`statsbombpy`](https://github.com/statsbomb/statsbombpy) to process the raw data.

We start by downloading matches from at least two different competitions, to be able to compare them

## 1.1 Import raw data

In [None]:
df_comp = sb.competitions()

In [None]:
mask = df_comp['competition_international'] == False
df_comp = df_comp[mask]
competitionId2Name = dict(zip(df_comp['competition_id'],df_comp['competition_name']))
df_comp.competition_name.unique()

In [None]:
competition_ids = [37,49,12,2,11]
season_ids = [90,3,27,27,27]

compId2sort = {c: i for i,c in enumerate(competition_ids)}

In [None]:
games = {c: sb.matches(competition_id=c, season_id=season_ids[i]) for i, c in enumerate(competition_ids)}

In [None]:
cols = ['match_id', 'match_date','home_team', 'away_team', 'home_score', 'away_score']
games[49][cols].head()

## 1.2 Process into a matrix

In [None]:
df = {competition_id: prcs.process_games(games[competition_id]) for competition_id in competition_ids}

A, encoder_teams = {}, {}
for k,v in df.items():
    A[k],encoder_teams[k] = prcs.df2matrix(v,score_label='points',method='points')
    print(k,A[k].shape)

In [None]:
def get_points(df: pd.DataFrame, competition_id: int = None):
    '''
    Get total number of points for each team
    '''
    df_home = df.groupby(by=['home_team'])['home_points'].agg(['count','sum']).reset_index()
    df_home.rename(columns={'home_team':'node_label', 'count':'n_matches','sum': 'points'},inplace=True)
    df_away = df.groupby(by=['away_team'])['away_points'].agg(['count','sum']).reset_index()
    df_away.rename(columns={'away_team':'node_label', 'count':'n_matches','sum': 'points'},inplace=True)
    
    df_points = pd.concat([df_home,df_away]).reset_index().groupby(by=['node_label'])[['points','n_matches']].agg(['sum']).droplevel(1,axis=1).reset_index()
    df_points.loc[:,'points_prg'] = (df_points['points'] / df_points['n_matches']).map(lambda x: np.round(x,2))
    df_points = df_points.sort_values(by='points_prg',ascending=False).reset_index(drop=True)
    if competition_id is not None:
        df_points.loc[:,'competition_id'] = competition_id
        
    return df_points

In [None]:
df_points_comp = {k: get_points(v, competition_id=k) for k,v in df.items()}
df_points = pd.concat(df_points_comp.values())

# 2. Run ranking models

We can proceed by learning scores from the outcomes of matches 

## 2.1 SpringRank

In [None]:
# Initialize and fit model
model = {}
scaled_ranks = {}
stats = []
for k,v in A.items():
    model[k] = sr.SpringRank()
    model[k].fit(v)
    scaled_ranks[k] = model[k].get_rescaled_ranks(0.75) # Get the scaled ranks so that a one-rank difference means a 75% win rate
    d = [k,competitionId2Name[k],model[k].get_beta(),model[k].depth, model[k].n_levels,model[k].delta_beta]
    stats.append(d)
df_stats = pd.DataFrame(stats, columns = ['competition_id','competition_name', 'beta','depth','n_levels','delta_level'])
df_stats

## 2.2 Bradley-Terry model

In [None]:
# Initialize and fit model
model_bt = {}
scaled_ranks_bt = {}
for k,v in A.items():
    model_bt[k] = bt.BradleyTerry()
    model_bt[k].fit(v, method='em')
    scaled_ranks_bt[k] = np.exp(model_bt[k].ranks)
    scaled_ranks_bt[k] = model_bt[k].get_rescaled_ranks(0.75) # Get the scaled ranks so that a one-rank difference means a 75% win rate
    # scaled_ranks_bt[k] /= scaled_ranks_bt[k].max() # invariant rescaling to have the max score=1



In [None]:
nodeLabel2Id = {k: {c:i for i,c in enumerate(v.classes_)} for k,v in encoder_teams.items()}
nodeId2Label = {k: {i:c for i,c in enumerate(v.classes_)} for k,v in encoder_teams.items()}

In [None]:
# df_res = pd.concat([pd.DataFrame({'node_id': np.arange(model[k].ranks.shape[0]),'node_label': [nodeId2Label[k][i] for i in np.arange(model[k].ranks.shape[0])], 'score': model[k].ranks, 'competition_id': [k for j in range(len(model[k].ranks))]})
#            for k in model.keys()])
show_rescaled = True
fig_label = 'rescaled' if show_rescaled == True else 'not_rescaled'
if show_rescaled == True:
    df_res = pd.concat([pd.DataFrame({'node_id': np.arange(v.shape[0]),'node_label': [nodeId2Label[k][i] for i in np.arange(v.shape[0])], 'score_sr': v, 'competition_id': [k for j in range(len(v))]})
               for k,v in scaled_ranks.items()])
    df_res_bt = pd.concat([pd.DataFrame({'node_id': np.arange(v.shape[0]),'node_label': [nodeId2Label[k][i] for i in np.arange(v.shape[0])], 'score_bt': v, 'competition_id': [k for j in range(len(v))]})
               for k,v in scaled_ranks_bt.items()])
    df_res = df_res.merge(df_res_bt, on =['node_id','node_label','competition_id'])
else:
    df_res = pd.concat([pd.DataFrame({'node_id': np.arange(v.ranks.shape[0]),'node_label': [nodeId2Label[k][i] for i in np.arange(v.ranks.shape[0])], 'score_sr': v.ranks, 'competition_id': [k for j in range(len(v.ranks))]})
               for k,v in model.items()])
    df_res_bt = pd.concat([pd.DataFrame({'node_id': np.arange(v.ranks.shape[0]),'node_label': [nodeId2Label[k][i] for i in np.arange(v.ranks.shape[0])], 'score_bt': v.ranks, 'competition_id': [k for j in range(len(v.ranks))]})
               for k,v in model.items()])
    df_res = df_res.merge(df_res_bt, on =['node_id','node_label','competition_id'])
# df_res.head()



Let's get aggregate statistics to characterize the distributions per league

In [None]:
algo = 'sr'
metric = f'score_{algo}'
df_plot_dist = df_res.groupby(by='competition_id')[metric].agg(['describe']).droplevel(0,axis=1).reset_index().sort_values(by='competition_id',key=lambda x: x.map(compId2sort))
df_plot_dist

# 3. Analyze results

In [None]:
mc = viz.default_colors_dict['blue_dark']
ms = 200
colors = [viz.default_colors_dict['blue_sb_dark'], viz.default_colors_dict['green_forest'],viz.default_colors_dict['red_adobe'],
          viz.default_colors_dict['yellow_sand'],viz.default_colors_dict['purple'],viz.default_colors_dict['dark_grey'],viz.default_colors_dict['purple_sb_dark']]

In [None]:
sorted_ylabels = [competitionId2Name[c] for c in df_plot_dist['competition_id']]

In [None]:
label_dict = {'sr':'SpringRank','bt':'Bradley-Terry','points_prg':'Points per game'}

Plot score distribution over different leagues 

In [None]:
title = f'{label_dict[algo]} scores from soccer matches'
point_label = 'node_label'
nmax = min(200,len(df_plot_dist))
n_display_max = 10
fig, ax = plt.subplots(1,1, figsize=(8,8))

xs = np.arange(len(df_plot_dist), 0,-1)
plt.hlines(xs[:nmax],xmin=df_plot_dist[:nmax]['min'],xmax=df_plot_dist[:nmax]['max'], alpha=0.7, color = mc, lw = 2, ls='-',zorder=1)
plt.scatter(df_plot_dist[:nmax]['max'], xs[:nmax],s=ms, alpha=0.6, c = viz.default_colors_dict['blue_dark'],edgecolors=BLACK,zorder=5)
plt.scatter(df_plot_dist[:nmax]['min'], xs[:nmax],s=ms, alpha=0.6, c = viz.default_colors_dict['blue'],edgecolors=BLACK,zorder=5)

'''
Inidividual points
'''
ylabels = []
teams_to_display = []
for i, cid in enumerate(competition_ids):
    g = df_res[df_res.competition_id == cid]
    l = len(g)
    x_data = np.array([xs[i]] * l)
    x_jittered = np.array([x + st.t(df=6, scale=0.08).rvs(1) for x in x_data])
    xjit2name = dict(zip(g[point_label],x_jittered))

    x = np.array(g[metric])
    clustering = AffinityPropagation(random_state=5).fit(x.reshape(-1, 1))
    clabels = clustering.labels_
    n_clusters = len(np.unique(clabels))

    cs = [colors[k] for k in clabels]
    plt.scatter(g[metric], x_jittered,s=50, alpha=0.8, c = cs,edgecolors=BLACK,zorder=1)

    msg = f"{sorted_ylabels[i]} (n={l})".replace("Women's",'')
    msg = f"{msg}\nbeta = {model[cid].get_beta():.2f}"
    msg = msg.replace("Women",'')
    ylabels.append(f"{msg}")

    # select points to annotate
    cond1 = g[metric] >= g[metric].quantile(0.80)
    cond2 = g[metric] <= g[metric].quantile(0.20)
    mask = np.logical_or(cond1,cond2)
    n_display = min(n_display_max, np.sum(mask))
    
    for i in range(n_display):
        df_tmp = g[mask].sort_values(by=[metric], ascending=False)
        # idx = df_tmp.index[i]
        tname = df_tmp.iloc[i][point_label]
        y = df_tmp.iloc[i][metric]
        x = x_jittered[mask][i]
        # players_to_display.append([x, y, tname])
        teams_to_display.append([y,xjit2name[tname],tname])

# ------------ marker annotation ----------------------------------------------
ts = []
for d in teams_to_display:
    msg = f"{d[2]}"
    ts.append(ax.text(d[0], d[1], msg, fontsize=8, zorder=1))
adjust_text(ts, force_text=(0.5, 0.5),
				arrowprops=dict(arrowstyle='-|>', color='black', connectionstyle="arc3,rad=-.5", zorder=10),
				ax=ax)
# ----------------------------------------------------------
lines = [Line2D([0], [0], color=c,  marker='o', mec="w", linestyle='', markersize=15,) for c in [viz.default_colors_dict['blue'],viz.default_colors_dict['blue_dark']]]
plt.legend(lines,['Min','Max'] , labelcolor= '#101628',bbox_to_anchor=(0.8, 1.0), loc="lower center",ncols = 2,frameon=False, fontsize= 14)

plt.yticks(xs[:nmax],ylabels[:nmax],fontsize=12)
plt.xticks(fontsize=14)
plt.xlabel('Score',fontsize=14)
plt.gca().grid(axis='x')

msg = f"{title}"
fig.text(0,1.,msg,fontweight="normal",fontsize=24,ha="left",color=viz.default_colors_dict['red'])

subtitle = f"Scores are calculate from games' results in terms of score difference.\nMarker colors are clusters of teams with similar scores."
fig.text(
    0., 0.0, f"{subtitle}", size=11,
    color="#000000",
    ha="left"
)
plt.tight_layout()

filename = tl.get_filename(f'soccer_{algo}_{fig_label}', lecture_id=lecture_id)
filename = None
tl.savefig(plt, outfile=filename, outdir=outdir_fig)


## 3.1 How is this related to the actual points attained by each team?

Let's merge datasets of learned scores and official league standings

In [None]:
df_tot = df_res.merge(df_points,on=['node_label','competition_id']).sort_values(by='points_prg',ascending=False).reset_index(drop=True)
df_tot.head()

In [None]:
fig, ax = plt.subplots(1,1, figsize=(6,6))
algo = 'sr'
x = f'score_{algo}'
y = 'points_prg'
plot_linear_regression = True
for i, (n,g) in enumerate(df_tot.groupby(by='competition_id')):
    spearman_coef = spearmanr(g[x],g[y])[0]
    pearson_coef = pearsonr(g[x],g[y])[0]
    msg = f"{competitionId2Name[n]}, sp = {spearman_coef:.2f} | pr = {pearson_coef:.2f}"
    ax.scatter(g[x],g[y],c=colors[i],label=msg)
    # ----------------------------------------------
    if plot_linear_regression == True:
        m, b = np.polyfit(list(g[x]), list(g[y]), 1)
        xmin, xmax, ymin, ymax = plt.axis()
        xs = np.linspace(xmin, xmax, 100)
        ax.plot(xs, m * xs + b, ls='--', c=colors[i], alpha=0.8, lw=1)
    # ----------------------------------------------
ax.set_xlabel(f"Score {algo.upper()}")
ax.set_ylabel(label_dict[y])
plt.legend(loc='best',fontsize=10)

filename = tl.get_filename(f'soccer_{algo}_vs_points', lecture_id=lecture_id)
filename = None
tl.savefig(plt, outfile=filename, outdir=outdir_fig)

In [None]:
df_points_comp[49]

In [None]:
k = 49
x = 'score_sr'
y = 'points_prg'
plot_linear_regression = True


fig, ax = plt.subplots(1,1, figsize=(6,6))

g = df_tot[df_tot.competition_id==k]
spearman_coef = spearmanr(g[x],g[y])[0]
pearson_coef = pearsonr(g[x],g[y])[0]
msg = f"{competitionId2Name[k]}, sp = {spearman_coef:.2f} | pr = {pearson_coef:.2f}"
ax.scatter(g[x],g[y],c=colors[i],label=msg)

# ------------ marker annotation ----------------------------------------------
ts = []
for idx,row in g.iterrows():
    msg = f"{row['node_label']}"
    ts.append(ax.text(row[x], row[y], msg, fontsize=8, zorder=1))
adjust_text(ts, force_text=(0.5, 0.5),
				arrowprops=dict(arrowstyle='-|>', color='black', connectionstyle="arc3,rad=-.5", zorder=10),
				ax=ax)
# ----------------------------------------------
if plot_linear_regression == True:
    m, b = np.polyfit(list(g[x]), list(g[y]), 1)
    xmin, xmax, ymin, ymax = plt.axis()
    xs = np.linspace(xmin, xmax, 100)
    ax.plot(xs, m * xs + b, ls='--', c='grey', alpha=0.8, lw=1)
# ----------------------------------------------

ax.set_xlabel(x)
ax.set_ylabel(y)
plt.legend(loc='best',fontsize=10)

In [None]:
k = 49
ref_team_name = 'North Carolina Courage'
cond1 = df[k].home_team == ref_team_name
cond2 = df[k].away_team == ref_team_name
cond3 = df[k].home_score != df[k].away_score
mask = (cond1 | cond2) & cond3
df[k][mask][cols]

In [None]:
k = 49
delta_x = 0.2
q = 0.75
fig,ax = plt.subplots(1,1,figsize=(6,6))


viz.plot_score_network(A[k],model[k].ranks,cm = colormap,ax=ax,plot_labels=True,
                      nodeId2Label=nodeId2Label[k])

# --- Plot levels
delta_ref = model[k].ranks.max() - model[k].ranks.min()
delta_beta = (np.log(q/(1-q)))/(2*model[k].beta)
ys = np.linspace(model[k].ranks.min(),model[k].ranks.max(),100)
xs = delta_x * np.ones(ys.shape[0])
ax.plot(xs,ys,lw=1,color=viz.default_colors_dict['blue_sb_dark'])
B = int(np.ceil(delta_ref/delta_beta))
ys = np.arange(model[k].ranks.min(),model[k].ranks.min() + B * delta_beta,delta_beta)
xs = delta_x * np.ones(ys.shape[0])
ax.scatter(xs,ys,lw=1,marker = '_',color=viz.default_colors_dict['blue_sb_dark'])
# ---------
plt.tight_layout()
filename = tl.get_filename(f'soccer_{algo}_{k}_scores', lecture_id=lecture_id)
filename = None
tl.savefig(plt, outfile=filename, outdir=outdir_fig)

## 3.2 Simulate games

We are ready to generate games from the main model parameters. 

In [None]:
def get_H(s: np.ndarray, l: float = 1):
    N = s.shape[0]
    H = np.zeros((N,N))
    for i in range(N):
        for j in range(N):
            if i != j:
                H[i,j] = 0.5 * (s[i]-s[j] - l)
    return H

In [None]:
k = 49
# beta = 10.1
# c = 1
beta = model[k].beta

H = get_H(model[k].ranks)
lambda_pois = np.exp(beta * H)
np.fill_diagonal(lambda_pois,0)
c = np.sum(lambda_pois) / np.sum(A[k])

SAMPLE = 1000
A_sim = np.array([prng.poisson(lambda_pois) for s in np.arange(SAMPLE)])

A_sim_avg = np.mean(A_sim, axis=0)
np.fill_diagonal(A_sim_avg,0)
A_sim.shape

In [None]:


fig, ax = plt.subplots(1,2,figsize=(8,4))

node_order = np.argsort(-model[k].ranks)
viz.plot_matrix(A[k],ax=ax[0],node_order=node_order,title=f"GT data")
viz.plot_matrix(A_sim_avg,ax=ax[1],node_order=node_order,title=f"Estimated average")

We can select one example sample and check the data

In [None]:
k = 49
fig,ax = plt.subplots(1,1,figsize=(6,6))

idx = prng.choice(np.arange(SAMPLE)) # random sample
viz.plot_score_network(A_sim[0],model[k].ranks,cm = colormap,ax=ax,plot_labels=True,
                      nodeId2Label=nodeId2Label[k],x_jit=0.05)

What happens if you change the hyperparameters?

Note that this makes sense if you do not have a fixed schedule, and you want to generate that as well.  


### 3.2.1 La Liga
The top 3 teams are very close to each other. What is the probability that one of them wins the league if we were to simulate it n times?

In [None]:
k = 11

# beta = 10.1
# c = 1
beta = model[k].beta

H = get_H(model[k].ranks)
lambda_pois = np.exp(beta * H)
np.fill_diagonal(lambda_pois,0)
c = np.sum(lambda_pois) / np.sum(A[k])

SAMPLE = 1000
A_sim = np.array([prng.poisson(lambda_pois) for s in np.arange(SAMPLE)])

A_sim_avg = np.mean(A_sim, axis=0)
np.fill_diagonal(A_sim_avg,0)
A_sim.shape,c,beta


In [None]:
fig, ax = plt.subplots(1,3,figsize=(8,4))

node_order = np.argsort(-model[k].ranks)
viz.plot_matrix(A[k],ax=ax[0],node_order=node_order, title = f"GT data")
idx = prng.choice(np.arange(SAMPLE))
viz.plot_matrix(A_sim[idx],ax=ax[1],node_order=node_order, title = f"Example sample {idx}")
viz.plot_matrix(A_sim_avg,ax=ax[2],node_order=node_order, title = f"Estimated average")

Alternatively, we can take every match in the schedule and simulate who wins. 

In [None]:
def get_simulated_games_df(df: pd.DataFrame,ranks: pd.DataFrame, nodeLabel2Id: dict,
                          beta: float = 1, competition_id: int=None):

    cols = ['home_team','away_team']#,'home_score','away_score','score_diff','home_points','away_points']
    score_diff = []
    df_new = df[cols].copy(deep=True)
    for c in 'home_points','away_points':
        df_new.loc[:,c] = 0
        
    for idx, rows in df_new.iterrows():

        i = nodeLabel2Id[rows['home_team']]
        j = nodeLabel2Id[rows['away_team']]
        s_i = ranks[i]
        s_j = ranks[j]

        p_ij = 1/ (1+ np.exp(-beta* (s_i-s_j)))
        r = st.bernoulli.rvs(p_ij, size=1)
        if r == 1:
            df_new.loc[idx,'home_points'] = 3
        elif r == 0:
            df_new.loc[idx,'away_points'] = 3
        else:
            print(f'r={r}')
    if competition_id is not None:
        df_new.loc[:,'competiton_id'] = k
    return df_new

In [None]:
SAMPLE = 100
df_sim = [get_simulated_games_df(df[k],model[k].ranks,nodeLabel2Id[k],beta=model[k].beta, competition_id=k) for s in range(SAMPLE)]
df_points_sim = [get_points(d, competition_id=k) for d in df_sim]

Let's check one particular (arbitrary) sample

In [None]:
idx = prng.choice(np.arange(SAMPLE))
df_points_sim[idx]

We can now count how many times in each of the simulated standing, one of the top 3 teams wins.

In [None]:
top3_ids = np.argsort(-model[k].ranks)[:3]
top3_labels = [nodeId2Label[k][i] for i in top3_ids]
top3_labels,model[k].ranks[top3_ids]

In [None]:
sim_ranks_top3 = np.zeros((len(top3_labels),len(top3_labels))).astype(int)
for rid, ref_team in enumerate(top3_labels):
    for df_tmp in df_points_sim:
        idx_sim = df_tmp[df_tmp.node_label == ref_team].index[0]
        if idx_sim < len(top3_labels):
            sim_ranks_top3[rid,idx_sim] += 1

In [None]:
pd.DataFrame(sim_ranks_top3, columns = ['n_1st','n_2nd','n_3rd'], index=top3_labels)

What are we missing?

In [None]:
df_points_comp[k].iloc[:3]

# 4. Depth of competition

We can compare the statistics of the soccer league with results of other types of datasets.   
We take Table S2 [https://arxiv.org/pdf/1709.09002](of the SpringRank paper) for other datasets.
                                                                                            

In [None]:
q = 0.75
dataset = ['parakeet G1', 'parakeet G2','Asian elephants','Business','Computer Science','History','Village 1','Village 2']
betas_S2 = np.array([2.7,2.78,2.33,2.04,2.23,2.39,1.98,1.89])
depth_S2 = np.array([2.604,1.879,3.,2.125,2.423,2.234,3.618,3.749])
delta_level_S2 = np.array([np.log(q / (1 - q)) / (2 * beta) for beta in betas_S2])
df_S2 = pd.DataFrame({'competition_id': [i + 100 for i in range(len(dataset))],'competition_name':dataset,
                      'beta':betas_S2,'depth':depth_S2,'n_levels':sr.calculate_n_levels(depth_S2,betas_S2),
                      'delta_level':delta_level_S2})
df_stats2 = pd.concat([df_stats,df_S2],axis=0).drop_duplicates()
df_stats2
                    
                 

In [None]:
dataset_type = ['Soccer','Parakeet','Elephant','Faculty hiring','Villages']
from matplotlib.lines import Line2D
from matplotlib.patches import Patch

x = 'beta'
y= 'n_levels'
color_plot = [colors[0] for i in range(5)] + [colors[1] for i in range(2)] + [colors[2] for i in range(1)] + [colors[3] for i in range(3)] + [colors[4] for i in range(2)]
fig,ax = plt.subplots(1,1,figsize=(8,4))
ax.bar(np.arange(len(df_stats2)),height=df_stats2[y],color = color_plot, width = 0.8, alpha=0.8)
ax.set_xlabel('Dataset')
ax.set_ylabel('Number of levels')
x_tick_labels = df_stats2['competition_name'].values
x_tick_labels[0] = 'FA WSL'

ax.set_xticks(np.arange(len(df_stats2)), labels = x_tick_labels, fontsize=8, rotation =60)

legend_elements = [
                   Line2D([0], [0], marker='o', color=colors[i], label=dataset_type[i],
                          markerfacecolor=colors[i], markersize=10, lw=0)
                    for i in np.arange(len(dataset_type))
                    ]
ax.legend(handles=legend_elements, loc='best')
ax.grid(axis='y')

filename = tl.get_filename(f'depth_competition_{algo}', lecture_id=lecture_id)
filename = None
tl.savefig(plt, outfile=filename, outdir=outdir_fig)


# 5. Model selection

How do we determine what scoring system is the best?

**Homework**!