# Load Data

In [1]:
%load_ext autoreload
%autoreload 2
import os; import sys; sys.path.append('../')
import pandas as pd
import tqdm
import warnings
import copy
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

import networkx as nx
import numpy as np
from collections import Counter
from collections import OrderedDict
import matplotlib.pyplot as plt

In [2]:
## Configure file and folder names
datafolder = "../data"
spadl_h5 = os.path.join(datafolder,"spadl-statsbomb.h5")
predictions_h5 = os.path.join(datafolder,"predictions.h5")

In [3]:
games = pd.read_hdf(spadl_h5,"games")
games = games[games.competition_name == "FIFA World Cup"]
print("nb of games:", len(games))

nb of games: 64


# Helper Functions

In [4]:
def change_possession(action, action_team, possession_team, result):
    end_pos = ['bad_touch', 'foul']
    change_team = ['pass', 'dribble', 'throw_in', 'corner_crossed', 'freekick_crossed', 'cross', 'shot', 
                  'freekick_short', 'goalkick', 'corner_short', 'shot_penalty', 'keeper_pick_up']
    success_change = ['tackle', 'interception', 'take_on', 'clearance', 'keeper_claim', 'keeper_save', 
                      'keeper_punch']
    
    if possession_team == None:
        if result == 'success':
            if action in change_team:
                possession_team = action_team
        else:
            return False, None
    
    
    if action in end_pos:
        return True, None
    
    if action_team != possession_team:
        if action in change_team:
            return True, action_team
        if result == 'success':
            if action in success_change:
                return True, action_team
            
    return False, possession_team

In [5]:
def extract_possessions(actions):
    all_possessions = []
    curr_possession = []
    team1 = []
    team2 = []
    
    possessing_team = actions.loc[0]["team_name"]
    team1_name = actions.loc[0]["team_name"]
    
    for i in range(len(actions)):
        # Extract possession
        action = actions.loc[i]["type_name"]
        action_team = actions.loc[i]["team_name"]
        
        if action_team != team1_name:
            team2_name = action_team
        
        result = actions.loc[i]["result_name"]
        end_pos, possessing_team = change_possession(action, action_team, possessing_team, result)
        
        if end_pos:
            all_possessions.append(copy.deepcopy(curr_possession))
            curr_possession = []

        curr_possession.append(actions.loc[i])
        
        # Identify players
        if (len(team1) == 14 and len(team2) == 14):
            continue
        
        player = actions.loc[i]["player_name"]
        if action_team == team1_name:
            if player not in team1:
                team1.append(player)
        else:
            if player not in team2:
                team2.append(player)

    return all_possessions, team1, team2, team1_name, team2_name

In [6]:
def pos_pass_list(pos):
    edges = []
    pass_action = ['pass', 'throw_in', 'corner_crossed', 'freekick_crossed', 'cross', 
                  'freekick_short', 'goalkick', 'corner_short']
    mult = 1
    
    for i in range(len(pos)):
        action = pos[i]
          
        if action["type_name"] == 'shot' and mult < 2:
            mult = 1.5 
        
        if action["type_name"] == 'shot' and action['result_name'] == 'success':
            mult = 2
            
        if action["type_name"] in pass_action:
            if action["result_name"] == 'success':
                passer = action["player_name"]
                team = action["team_name"]
                j = 1
                while i+j < len(pos) and (pos[i+j]["team_name"] != team):
                    j += 1
                try:
                    passer = action["player_name"]
                    receiver = pos[i+j]["player_name"]
                    edges.append((passer, receiver))
                except:
                    continue
                
    return edges, mult

In [7]:
def create_graph(passes):
    G = nx.DiGraph((x, y, {'weight': v}) for (x, y), v in Counter(passes).items())
    return G

In [9]:
def get_metrics(G):
    
    metrics = []
    metrics.append(nx.degree_centrality(G))
    metrics.append(nx.closeness_centrality(G))
    metrics.append(nx.betweenness_centrality(G))
    metrics.append(nx.load_centrality(G))
    metrics.append(nx.harmonic_centrality(G))
        
    return metrics

In [10]:
def weighted_average(weights, x):
    total = 0
    for i in range(len(x)):
        total += weights[i] * x[i]
        
    return total / sum(weights)

In [45]:
def compute_average(player_metrics):
    average = {}
    
    for player in player_metrics:
        if len(player_metrics[player][0]) < 100:
            continue
        
        weights = player_metrics[player][0]
        
        average[player] = []
        for i in range(1, len(player_metrics[player])):
            average[player].append(weighted_average(weights, player_metrics[player][i]))
    
    return average

In [22]:
def world_cup_team_placements():
    placements = {}
    placements['France'] = 1.5
    placements['Croatia'] = 1.5
    placements['Belgium'] = 3.5
    placements['England'] = 3.5
    placements['Uruguay'] = 6.5
    placements['Brazil'] = 6.5
    placements['Russia'] = 6.5
    placements['Sweden'] = 6.5
    placements['Portugal'] = 12.5
    placements['Argentina'] = 12.5
    placements['Mexico'] = 12.5
    placements['Japan'] = 12.5
    placements['Spain'] = 12.5
    placements['Denmark'] = 12.5
    placements['Switzerland'] = 12.5
    placements['Colombia'] = 12.5
    placements['Saudi Arabia'] = 24.5
    placements['Iran'] = 24.5
    placements['Peru'] = 24.5
    placements['Nigeria'] = 24.5
    placements['Serbia'] = 24.5
    placements['South Korea'] = 24.5
    placements['Tunisia'] = 24.5
    placements['Senegal'] = 24.5
    placements['Egypt'] = 24.5
    placements['Morocco'] = 24.5
    placements['Australia'] = 24.5
    placements['Iceland'] = 24.5
    placements['Costa Rica'] = 24.5
    placements['Germany'] = 24.5
    placements['Panama'] = 24.5
    placements['Poland'] = 24.5
    
    return placements

# Compute Network Metrics

In [12]:
players = pd.read_hdf(spadl_h5,"players")
teams = pd.read_hdf(spadl_h5,"teams")
actiontypes = pd.read_hdf(spadl_h5, "actiontypes")
bodyparts = pd.read_hdf(spadl_h5, "bodyparts")
results = pd.read_hdf(spadl_h5, "results")

In [20]:
pos_metrics = {}
roster = {}

for game in tqdm.tqdm(list(games.itertuples())):
    actions = pd.read_hdf(spadl_h5,f"actions/game_{game.game_id}")
    actions = (
        actions.merge(actiontypes)
        .merge(results)
        .merge(bodyparts)
        .merge(players,"left",on="player_id")
        .merge(teams,"left",on="team_id")
        .sort_values(["period_id", "time_seconds", "timestamp"])
        .reset_index(drop=True)
    )
    
    possessions, team1, team2, team1_name, team2_name = extract_possessions(actions)
    
    for player in team1:
        if player not in roster:
            roster[player] = team1_name
    for player in team2:
        if player not in roster:
            roster[player] = team2_name
    
    for pos in possessions:
        passes, mult = pos_pass_list(pos)
        
        if len(passes) < 3:
            continue
        
        G = create_graph(passes)
        
        metrics = get_metrics(G)
        
        for player in metrics[0]:
            if player in pos_metrics:
                pos_metrics[player][0].append(mult)
                for i in range(len(metrics)):
                    pos_metrics[player][i+1].append(metrics[i][player]) 
            else:
                pos_metrics[player] = []
                pos_metrics[player].append([mult])
                for i in range(len(metrics)):
                    pos_metrics[player].append([metrics[i][player]]) 

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 64/64 [01:40<00:00,  1.73s/it]


In [51]:
player_avg = compute_average(pos_metrics)
placements = world_cup_team_placements()

# Degree Centrality

In [52]:
count_teams = {}
for team in placements:
    count_teams[team] = 2

ordered_players = OrderedDict(sorted(player_avg.items(), key=lambda x: x[1][0], reverse=True))
for player in ordered_players:
    team = roster[player]
    if count_teams[team] > 0:
        print(player + " (" + team + ") : " + str(ordered_players[player][0]))
        count_teams[team] -= 1

Salman Mohammed Al Faraj (Saudi Arabia) : 0.6713413900913903
Simon Thorup Kjær (Denmark) : 0.6538847117794485
Granit Xhaka (Switzerland) : 0.6474925392500326
Abdullah Ibrahim Otayf (Saudi Arabia) : 0.6455301587301587
Marcelo Brozović (Croatia) : 0.6274988421540149
Éver Maximiliano David Banega (Argentina) : 0.6235810485810485
Javier Alejandro Mascherano (Argentina) : 0.6235062032475828
Toby Alderweireld (Belgium) : 0.622043279928794
Manuel Obafemi Akanji (Switzerland) : 0.6200191570881228
Vincent Kompany (Belgium) : 0.610767871625536
Toni Kroos (Germany) : 0.6084645152826972
Kyle Walker (England) : 0.6054269752593775
William Silva de Carvalho (Portugal) : 0.604338125187182
Andrés Iniesta Luján (Spain) : 0.6027696793002919
John Stones (England) : 0.6017425810904078
Luka Modrić (Croatia) : 0.5917154740684155
Carlos Joel Salcedo Hernández (Mexico) : 0.5910275319567353
Sergio Ramos García (Spain) : 0.5908143842833355
Matías Vecino Falero (Uruguay) : 0.5874650734201298
Philippe Coutinho Cor

# Closeness Centrality

In [53]:
count_teams = {}
for team in placements:
    count_teams[team] = 2

ordered_players = OrderedDict(sorted(player_avg.items(), key=lambda x: x[1][1], reverse=True))
for player in ordered_players:
    team = roster[player]
    if count_teams[team] > 0:
        print(player + " (" + team + ") : " + str(ordered_players[player][1]))
        count_teams[team] -= 1

Andrés Iniesta Luján (Spain) : 0.4519586166204719
Salman Mohammed Al Faraj (Saudi Arabia) : 0.44574903484255746
Francisco Román Alarcón Suárez (Spain) : 0.4370359318044097
Éver Maximiliano David Banega (Argentina) : 0.4320219273351625
Eden Hazard (Belgium) : 0.42918521219449246
Xherdan Shaqiri (Switzerland) : 0.4244970136476023
Abdullah Ibrahim Otayf (Saudi Arabia) : 0.4225309966240616
Neymar da Silva Santos Junior (Brazil) : 0.41725902550559896
Vincent Kompany (Belgium) : 0.41111692594408855
Granit Xhaka (Switzerland) : 0.4103903992318051
Simon Thorup Kjær (Denmark) : 0.40931906825383996
Philippe Coutinho Correia (Brazil) : 0.4084422758545679
Toni Kroos (Germany) : 0.40073750445926054
Lionel Andrés Messi Cuccittini (Argentina) : 0.40023746181855796
Jesse Lingard (England) : 0.39561635599466427
Matías Vecino Falero (Uruguay) : 0.39532643365875214
Kyle Walker (England) : 0.3945035846256702
Marcelo Brozović (Croatia) : 0.3944165011721003
Rodrigo Bentancur Colmán (Uruguay) : 0.39343942914

# Betweenness Centrality

In [57]:
count_teams = {}
for team in placements:
    count_teams[team] = 2

ordered_players = OrderedDict(sorted(player_avg.items(), key=lambda x: x[1][2], reverse=True))
for player in ordered_players:
    team = roster[player]
    if count_teams[team] > 0:
        print(player + " (" + team + ") : " + str(ordered_players[player][2]))
        count_teams[team] -= 1

Vincent Kompany (Belgium) : 0.28916985285598423
John Stones (England) : 0.28307324016563135
Carlos Joel Salcedo Hernández (Mexico) : 0.2812947745469868
Sergio Ramos García (Spain) : 0.2782908749985836
Marcelo Brozović (Croatia) : 0.2777698833733317
Javier Alejandro Mascherano (Argentina) : 0.27743036550507805
Granit Xhaka (Switzerland) : 0.2754015108919742
Simon Thorup Kjær (Denmark) : 0.27155214424951263
Kyle Walker (England) : 0.2700903017942124
William Silva de Carvalho (Portugal) : 0.2684865728261955
Salman Mohammed Al Faraj (Saudi Arabia) : 0.2642300986050986
Toni Kroos (Germany) : 0.26407782733540325
Rodrigo Bentancur Colmán (Uruguay) : 0.2619475232621784
Toby Alderweireld (Belgium) : 0.26192268703950944
Éver Maximiliano David Banega (Argentina) : 0.2603277674706246
João Miranda de Souza Filho (Brazil) : 0.2583520646020645
Davinson Sánchez Mina (Colombia) : 0.25145999453511186
N"Golo Kanté (France) : 0.24732484948002195
Raphaël Varane (France) : 0.24518295006522145
Abdullah Ibrah

# Load Centrality

In [55]:
count_teams = {}
for team in placements:
    count_teams[team] = 2

ordered_players = OrderedDict(sorted(player_avg.items(), key=lambda x: x[1][3], reverse=True))
for player in ordered_players:
    team = roster[player]
    if count_teams[team] > 0:
        print(player + " (" + team + ") : " + str(ordered_players[player][3]))
        count_teams[team] -= 1

Vincent Kompany (Belgium) : 0.2891855424245935
John Stones (England) : 0.28304736024844707
Carlos Joel Salcedo Hernández (Mexico) : 0.2813474504846185
Sergio Ramos García (Spain) : 0.2782816695557595
Marcelo Brozović (Croatia) : 0.2777698833733317
Javier Alejandro Mascherano (Argentina) : 0.2773828528857264
Granit Xhaka (Switzerland) : 0.2753323097328548
Simon Thorup Kjær (Denmark) : 0.2714303118908382
Kyle Walker (England) : 0.2701161656468919
William Silva de Carvalho (Portugal) : 0.2685002994908655
Salman Mohammed Al Faraj (Saudi Arabia) : 0.2642300986050986
Toni Kroos (Germany) : 0.264109256344105
Rodrigo Bentancur Colmán (Uruguay) : 0.26197745621236995
Toby Alderweireld (Belgium) : 0.26192964075557534
Éver Maximiliano David Banega (Argentina) : 0.2604866694152408
João Miranda de Souza Filho (Brazil) : 0.25839299589299586
Davinson Sánchez Mina (Colombia) : 0.2514817298854857
N"Golo Kanté (France) : 0.24734853269336035
Raphaël Varane (France) : 0.24508475135206428
Abdullah Ibrahim O

# Harmonic Centrality

In [56]:
count_teams = {}
for team in placements:
    count_teams[team] = 2

ordered_players = OrderedDict(sorted(player_avg.items(), key=lambda x: x[1][4], reverse=True))
for player in ordered_players:
    team = roster[player]
    if count_teams[team] > 0:
        print(player + " (" + team + ") : " + str(ordered_players[player][4]))
        count_teams[team] -= 1

Jorge Resurrección Merodio (Spain) : 3.079237739872068
Francisco Román Alarcón Suárez (Spain) : 3.067658310681568
Vincent Kompany (Belgium) : 2.688946819603754
John Stones (England) : 2.5676138716356114
Nacer Chadli (Belgium) : 2.5328936078936084
Rodrigo Bentancur Colmán (Uruguay) : 2.505172413793104
Éver Maximiliano David Banega (Argentina) : 2.469624819624819
Lionel Andrés Messi Cuccittini (Argentina) : 2.4688380281690137
Gaku Shibasaki (Japan) : 2.4571830419931677
Kyle Walker (England) : 2.448975791433891
Marcelo Vieira da Silva Júnior (Brazil) : 2.4189943709113035
Maya Yoshida (Japan) : 2.407396708683474
Manuel Obafemi Akanji (Switzerland) : 2.407068965517241
Xherdan Shaqiri (Switzerland) : 2.386700537955768
Abdullah Ibrahim Otayf (Saudi Arabia) : 2.377533333333333
Salman Mohammed Al Faraj (Saudi Arabia) : 2.375631313131314
Neymar da Silva Santos Junior (Brazil) : 2.374613170205109
William Silva de Carvalho (Portugal) : 2.3518328840970355
Joshua Kimmich (Germany) : 2.33376010781671