# Load Data

In [2]:
%load_ext autoreload
%autoreload 2
import os; import sys; sys.path.append('../')
import pandas as pd
import tqdm
import warnings
import copy
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

import networkx as nx
import numpy as np
from collections import Counter
from collections import OrderedDict
import matplotlib.pyplot as plt

In [3]:
## Configure file and folder names
datafolder = "../data"
spadl_h5 = os.path.join(datafolder,"spadl-statsbomb.h5")
predictions_h5 = os.path.join(datafolder,"predictions.h5")

In [4]:
games = pd.read_hdf(spadl_h5,"games")
games = games[games.competition_name == "La Liga"]
print("nb of games:", len(games))

nb of games: 348


# Helper Functions

In [5]:
def change_possession(action, action_team, possession_team, result):
    end_pos = ['bad_touch', 'foul']
    change_team = ['pass', 'dribble', 'throw_in', 'corner_crossed', 'freekick_crossed', 'cross', 'shot', 
                  'freekick_short', 'goalkick', 'corner_short', 'shot_penalty', 'keeper_pick_up']
    success_change = ['tackle', 'interception', 'take_on', 'clearance', 'keeper_claim', 'keeper_save', 
                      'keeper_punch']
    
    if possession_team == None:
        if result == 'success':
            if action in change_team:
                possession_team = action_team
        else:
            return False, None
    
    
    if action in end_pos:
        return True, None
    
    if action_team != possession_team:
        if action in change_team:
            return True, action_team
        if result == 'success':
            if action in success_change:
                return True, action_team
            
    return False, possession_team

In [6]:
def extract_possessions(actions):
    all_possessions = []
    curr_possession = []
    team1 = []
    team2 = []
    
    possessing_team = actions.loc[0]["team_name"]
    team1_name = actions.loc[0]["team_name"]
    
    for i in range(len(actions)):
        # Extract possession
        action = actions.loc[i]["type_name"]
        action_team = actions.loc[i]["team_name"]
        
        if action_team != team1_name:
            team2_name = action_team
        
        result = actions.loc[i]["result_name"]
        end_pos, possessing_team = change_possession(action, action_team, possessing_team, result)
        
        if end_pos:
            all_possessions.append(copy.deepcopy(curr_possession))
            curr_possession = []

        curr_possession.append(actions.loc[i])
        
        # Identify players
        if (len(team1) == 14 and len(team2) == 14):
            continue
        
        player = actions.loc[i]["player_name"]
        if action_team == team1_name:
            if player not in team1:
                team1.append(player)
        else:
            if player not in team2:
                team2.append(player)

    return all_possessions, team1, team2, team1_name, team2_name

In [7]:
def pos_pass_list(pos):
    edges = []
    pass_action = ['pass', 'throw_in', 'corner_crossed', 'freekick_crossed', 'cross', 
                  'freekick_short', 'goalkick', 'corner_short']
    mult = 1
    
    for i in range(len(pos)):
        action = pos[i]
          
        if action["type_name"] == 'shot' and mult < 2:
            mult = 1.5 
        
        if action["type_name"] == 'shot' and action['result_name'] == 'success':
            mult = 2
            
        if action["type_name"] in pass_action:
            if action["result_name"] == 'success':
                passer = action["player_name"]
                team = action["team_name"]
                j = 1
                while i+j < len(pos) and (pos[i+j]["team_name"] != team):
                    j += 1
                try:
                    passer = action["player_name"]
                    receiver = pos[i+j]["player_name"]
                    edges.append((passer, receiver))
                except:
                    continue
                
    return edges, mult

In [8]:
def create_graph(passes):
    G = nx.DiGraph((x, y, {'weight': v}) for (x, y), v in Counter(passes).items())
    return G

In [9]:
def get_metrics(G):
    
    metrics = []
    metrics.append(nx.degree_centrality(G))
    metrics.append(nx.closeness_centrality(G))
    metrics.append(nx.betweenness_centrality(G))
    metrics.append(nx.load_centrality(G))
    metrics.append(nx.harmonic_centrality(G))
        
    return metrics

In [10]:
def weighted_average(weights, x):
    total = 0
    for i in range(len(x)):
        total += weights[i] * x[i]
        
    return total / sum(weights)

In [39]:
def compute_average(player_metrics):
    average = {}
    
    for player in player_metrics:
        if len(player_metrics[player][0]) < 150:
            continue
        
        weights = player_metrics[player][0]
        
        average[player] = []
        for i in range(1, len(player_metrics[player])):
            average[player].append(weighted_average(weights, player_metrics[player][i]))
    
    return average

# Compute Network Metrics

In [12]:
players = pd.read_hdf(spadl_h5,"players")
teams = pd.read_hdf(spadl_h5,"teams")
actiontypes = pd.read_hdf(spadl_h5, "actiontypes")
bodyparts = pd.read_hdf(spadl_h5, "bodyparts")
results = pd.read_hdf(spadl_h5, "results")

In [13]:
pos_metrics = {}
roster = {}

for game in tqdm.tqdm(list(games.itertuples())):
    actions = pd.read_hdf(spadl_h5,f"actions/game_{game.game_id}")
    actions = (
        actions.merge(actiontypes)
        .merge(results)
        .merge(bodyparts)
        .merge(players,"left",on="player_id")
        .merge(teams,"left",on="team_id")
        .sort_values(["period_id", "time_seconds", "timestamp"])
        .reset_index(drop=True)
    )
    
    possessions, team1, team2, team1_name, team2_name = extract_possessions(actions)
    
    for player in team1:
        if player not in roster:
            roster[player] = team1_name
    for player in team2:
        if player not in roster:
            roster[player] = team2_name
    
    for pos in possessions:
        passes, mult = pos_pass_list(pos)
        
        if len(passes) < 3:
            continue
        
        G = create_graph(passes)
        
        metrics = get_metrics(G)
        
        for player in metrics[0]:
            if player in pos_metrics:
                pos_metrics[player][0].append(mult)
                for i in range(len(metrics)):
                    pos_metrics[player][i+1].append(metrics[i][player]) 
            else:
                pos_metrics[player] = []
                pos_metrics[player].append([mult])
                for i in range(len(metrics)):
                    pos_metrics[player].append([metrics[i][player]]) 

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 348/348 [11:14<00:00,  1.82s/it]


In [41]:
player_avg = compute_average(pos_metrics)

# Degree Centrality

In [42]:
count = 20

ordered_players = OrderedDict(sorted(player_avg.items(), key=lambda x: x[1][0], reverse=True))
for player in ordered_players:
    team = roster[player]
    if count > 0:
        print(player + " (" + team + ") : " + str(ordered_players[player][0]))
        count -= 1

Roberto Trashorras Gayoso (Rayo Vallecano) : 0.7110157290429403
Éver Maximiliano David Banega (Valencia) : 0.646273520445706
Xabier Prieto Argarate (Real Sociedad) : 0.6460569412950369
Joan Verdú Fernández (Espanyol) : 0.6431884960556996
Ander Herrera Agüera (Real Zaragoza) : 0.6402022152022153
Jorge Resurrección Merodio (Atlético Madrid) : 0.6401840828924161
Daniel Parejo Muñoz (Getafe) : 0.6356554095426498
Simão Pedro Fonseca Sabrosa (Atlético Madrid) : 0.6351160684494018
Marcos Antonio Senna da Silva (Villarreal) : 0.6287748643761301
Xavier Hernández Creus (Barcelona) : 0.6270821278122394
Juan Francisco Torres Belén (Osasuna) : 0.6203174603174603
Thiago Motta (Barcelona) : 0.6136954080548733
Bruno Soriano Llido (Villarreal) : 0.6128995756718527
Andoni Iraola Sagarna (Athletic Bilbao) : 0.6124159268227067
Markel Susaeta Laskurain (Athletic Bilbao) : 0.6069534827669318
David Josué Jiménez Silva (Valencia) : 0.6054298747847138
Óscar de Marcos Arana (Athletic Bilbao) : 0.605083470169677

# Closeness Centrality

In [43]:
count = 20
ordered_players = OrderedDict(sorted(player_avg.items(), key=lambda x: x[1][1], reverse=True))
for player in ordered_players:
    team = roster[player]
    if count > 0:
        print(player + " (" + team + ") : " + str(ordered_players[player][1]))
        count -= 1

Roberto Trashorras Gayoso (Rayo Vallecano) : 0.4429158636781209
Sergio García De La Fuente (Espanyol) : 0.4368603473321411
Simão Pedro Fonseca Sabrosa (Atlético Madrid) : 0.4348115788453951
Jesús Navas González (Sevilla) : 0.4324266993917919
Xabier Prieto Argarate (Real Sociedad) : 0.42851614311278985
Ander Herrera Agüera (Real Zaragoza) : 0.4278359477407634
Frédéric Oumar Kanouté (Sevilla) : 0.4271428506051503
Rubén Gracia Calmache (Villarreal) : 0.4230167395369556
Joan Verdú Fernández (Espanyol) : 0.4223872294960343
Anderson Luís de Souza (Barcelona) : 0.4210335279359014
Xavier Hernández Creus (Barcelona) : 0.4190428754777612
Markel Susaeta Laskurain (Athletic Bilbao) : 0.41858127549710433
Lionel Andrés Messi Cuccittini (Barcelona) : 0.4183701926355994
Juan Francisco Torres Belén (Osasuna) : 0.41253168435155796
David Josué Jiménez Silva (Valencia) : 0.4113733945195537
Ronaldo de Assis Moreira (Barcelona) : 0.41128446011389463
Óscar de Marcos Arana (Athletic Bilbao) : 0.40923692836725

# Betweenness Centrality

In [33]:
count = 20

ordered_players = OrderedDict(sorted(player_avg.items(), key=lambda x: x[1][2], reverse=True))
for player in ordered_players:
    team = roster[player]
    if count > 0:
        print(player + " (" + team + ") : " + str(ordered_players[player][2]))
        count -= 1

Roberto Trashorras Gayoso (Rayo Vallecano) : 0.2841269841269841
Xavier Hernández Creus (Barcelona) : 0.27029966441507963
Dmytro Chygrynskiy (Barcelona) : 0.2593239590522198
Thiago Motta (Barcelona) : 0.25400543263831993
Luka Modrić (Real Madrid) : 0.25151179019289255
Daniel Parejo Muñoz (Getafe) : 0.24891079082473744
Lilian Thuram (Barcelona) : 0.24830746948065407
Tiago Cardoso Mendes (Atlético Madrid) : 0.2475227307751581
Alexandre Dimitri Song-Billong (Barcelona) : 0.24235879049475584
Thiago Alcântara do Nascimento (Barcelona) : 0.24143486781417803
Éver Maximiliano David Banega (Valencia) : 0.23996022986089222
Francisco Puñal Martínez (Osasuna) : 0.23559077809798262
Gabriel Alejandro Milito (Barcelona) : 0.23039515178675163
Rafael Márquez Álvarez (Barcelona) : 0.22979813844639452
Thomas Vermaelen (Barcelona) : 0.227039293131583
Javier Alejandro Mascherano (Barcelona) : 0.22697637709031815
Gnégnéri Yaya Touré (Barcelona) : 0.22573898960765384
Xabier Prieto Argarate (Real Sociedad) : 0

# Load Centrality

In [34]:
count = 20

ordered_players = OrderedDict(sorted(player_avg.items(), key=lambda x: x[1][3], reverse=True))
for player in ordered_players:
    team = roster[player]
    if count > 0:
        print(player + " (" + team + ") : " + str(ordered_players[player][3]))
        count -= 1

Roberto Trashorras Gayoso (Rayo Vallecano) : 0.28415847820609724
Xavier Hernández Creus (Barcelona) : 0.27028613212825997
Dmytro Chygrynskiy (Barcelona) : 0.2593423805210488
Thiago Motta (Barcelona) : 0.2540281950893806
Luka Modrić (Real Madrid) : 0.2514180519101779
Daniel Parejo Muñoz (Getafe) : 0.24891079082473744
Lilian Thuram (Barcelona) : 0.2483046059826789
Tiago Cardoso Mendes (Atlético Madrid) : 0.24748179611650498
Alexandre Dimitri Song-Billong (Barcelona) : 0.2423326110414931
Thiago Alcântara do Nascimento (Barcelona) : 0.2414301375508271
Éver Maximiliano David Banega (Valencia) : 0.23996022986089222
Francisco Puñal Martínez (Osasuna) : 0.23559077809798262
Gabriel Alejandro Milito (Barcelona) : 0.23038836744178318
Rafael Márquez Álvarez (Barcelona) : 0.22978223703514417
Thomas Vermaelen (Barcelona) : 0.2270369752262277
Javier Alejandro Mascherano (Barcelona) : 0.2269565634329252
Gnégnéri Yaya Touré (Barcelona) : 0.2257423594351893
Xabier Prieto Argarate (Real Sociedad) : 0.225

# Harmonic Centrality

In [45]:
count = 20

ordered_players = OrderedDict(sorted(player_avg.items(), key=lambda x: x[1][4], reverse=True))
for player in ordered_players:
    team = roster[player]
    if count > 0:
        print(player + " (" + team + ") : " + str(ordered_players[player][4]))
        count -= 1

Ibrahim Afellay (Barcelona) : 2.6886855241264564
Thiago Alcântara do Nascimento (Barcelona) : 2.582607048124287
Francesc Fàbregas i Soler (Barcelona) : 2.532304116121403
Juan Isaac Cuenca López (Barcelona) : 2.4843977278759897
Thomas Vermaelen (Barcelona) : 2.4835354546803146
Neymar da Silva Santos Junior (Barcelona) : 2.4780343570652943
Xavier Hernández Creus (Barcelona) : 2.4711975453660733
Lionel Andrés Messi Cuccittini (Barcelona) : 2.4612068284817132
Andrés Iniesta Luján (Barcelona) : 2.4451436797279666
Rafael Alcântara do Nascimento (Celta Vigo) : 2.4392169840138416
Andreu Fontàs Prat (Barcelona) : 2.433168795973675
Pedro Eliezer Rodríguez Ledesma (Barcelona) : 2.4191562210773645
Alexis Alejandro Sánchez Sánchez (Barcelona) : 2.4079872291284605
Dmytro Chygrynskiy (Barcelona) : 2.396619435817808
Alexandre Dimitri Song-Billong (Barcelona) : 2.3947283573517106
David Villa Sánchez (Barcelona) : 2.3894559958686745
Arda Turan (Atlético Madrid) : 2.3748397664150396
Seydou Kéita (Barcelo