In [1]:
%load_ext autoreload
%autoreload 2
import os; import sys; sys.path.append('../')
import pandas as pd
import tqdm
import warnings
import copy
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

In [2]:
## Configure file and folder names
datafolder = "../data"
spadl_h5 = os.path.join(datafolder,"spadl-statsbomb.h5")
predictions_h5 = os.path.join(datafolder,"predictions.h5")

In [3]:
games = pd.read_hdf(spadl_h5,"games")
games = games[games.competition_name == "FIFA World Cup"]
print("nb of games:", len(games))

nb of games: 64


In [4]:
players = pd.read_hdf(spadl_h5,"players")
teams = pd.read_hdf(spadl_h5,"teams")
actiontypes = pd.read_hdf(spadl_h5, "actiontypes")
bodyparts = pd.read_hdf(spadl_h5, "bodyparts")
results = pd.read_hdf(spadl_h5, "results")

A = []
for game in tqdm.tqdm(list(games.itertuples())):
    actions = pd.read_hdf(spadl_h5,f"actions/game_{game.game_id}")
    actions = (
        actions.merge(actiontypes)
        .merge(results)
        .merge(bodyparts)
        .merge(players,"left",on="player_id")
        .merge(teams,"left",on="team_id")
        .sort_values(["period_id", "time_seconds", "timestamp"])
        .reset_index(drop=True)
    )

    A.append(pd.concat([actions],axis=1))
A = pd.concat(A).sort_values(["game_id","period_id", "time_seconds", "timestamp"]).reset_index(drop=True)

100%|██████████████████████████████████████████████████████████████████████████████████| 64/64 [00:01<00:00, 38.17it/s]


# Types of Actions

In [5]:
seen = []
test = A["type_name"]
for t in test:
    if t not in seen:
        seen.append(t)
        
print(seen)

['pass', 'dribble', 'tackle', 'throw_in', 'interception', 'foul', 'take_on', 'corner_crossed', 'clearance', 'freekick_crossed', 'bad_touch', 'shot', 'cross', 'freekick_short', 'goalkick', 'keeper_claim', 'keeper_save', 'shot_freekick', 'corner_short', 'shot_penalty', 'keeper_punch']


# Types of Results

In [6]:
seen = []
test = A["result_name"]
for t in test:
    if t not in seen:
        seen.append(t)
        
print(seen)

['success', 'fail', 'offside', 'yellow_card', 'owngoal', 'red_card']


In [7]:
def change_possession(action, action_team, possession_team, result):
    key_change = ['pass', 'dribble', 'throw_in', 'corner_crossed', 'freekick_crossed', 'cross', 'shot', 
                  'freekick_short', 'goalkick', 'corner_short', 'shot_penalty']
    success_change = ['tackle', 'interception', 'take_on', 'clearance', 'keeper_claim', 'keeper_save', 
                      'keeper_punch']
    
    if action in key_change:
        if (action_team != possession_team):
            return True
    if action in success_change:
        if (action_team != possession_team):
            if result == 'success':
                return True
            
        
    return False

In [8]:
all_possessions = []
curr_possession = []
possessing_team = A.loc[0]["team_id"]
for i in range(len(A)):
    action = A.loc[i]["type_name"]
    action_team = A.loc[i]["team_id"]
    result = A.loc[i]["result_name"]
    if change_possession(action, action_team, possessing_team, result):
        possessing_team = action_team
        all_possessions.append(copy.deepcopy(curr_possession))
        curr_possession = []
    
    curr_possession.append(A.loc[i])
    
#all_possessions = pd.concat(all_possessions)
#print(all_possessions[-1])

In [10]:
players = pd.read_hdf(spadl_h5,"players")
teams = pd.read_hdf(spadl_h5,"teams")
actiontypes = pd.read_hdf(spadl_h5, "actiontypes")
bodyparts = pd.read_hdf(spadl_h5, "bodyparts")
results = pd.read_hdf(spadl_h5, "results")

B = []
game = list(games.itertuples())[0]
#for game in tqdm.tqdm(list(games.itertuples()))[0]:
actions = pd.read_hdf(spadl_h5,f"actions/game_{game.game_id}")
actions = (
    actions.merge(actiontypes)
    .merge(results)
    .merge(bodyparts)
    .merge(players,"left",on="player_id")
    .merge(teams,"left",on="team_id")
    .sort_values(["period_id", "time_seconds", "timestamp"])
    .reset_index(drop=True)
)

#preds = pd.read_hdf(predictions_h5,f"game_{game.game_id}")
#values = vaep.value(actions,preds.scores,preds.concedes)
B.append(pd.concat([actions],axis=1))
B = pd.concat(B).sort_values(["game_id","period_id", "time_seconds", "timestamp"]).reset_index(drop=True)

In [11]:
all_possessions = []
curr_possession = []
possessing_team = B.loc[0]["team_id"]
for i in range(len(B)):
    action = B.loc[i]["type_name"]
    action_team = B.loc[i]["team_id"]
    result = B.loc[i]["result_name"]
    if change_possession(action, action_team, possessing_team, result):
        possessing_team = action_team
        all_possessions.append(copy.deepcopy(curr_possession))
        curr_possession = []
    
    curr_possession.append(B.loc[i])
    
#all_possessions = pd.concat(all_possessions)
#print(all_possessions)

In [12]:
#B.to_csv('test1.csv')
#print(all_possessions[0])

In [13]:
import networkx as nx
import numpy as np
from collections import Counter

In [14]:
def players_in_game(B):
    team_id = B.loc[0]["team_id"]
    team1 = []
    team2 = []
    players = {}
    for i in range(len(B)):
        player = B.loc[i]["player_name"]
        if players.get(player) == None:
            players[player] = [0,0]
            if B.loc[i]["team_id"] == team_id:
                team1.append(player)
            else:
                team2.append(player)
            
    return players, team1, team2

In [15]:
p_test = all_possessions[0]

In [22]:
game_centrality, team1, team2 = players_in_game(B)
avg_centrality = copy.deepcopy(game_centrality)
print(game_centrality)

{'Antoine Griezmann': [0, 0], 'Raphaël Varane': [0, 0], 'Benjamin Pavard': [0, 0], 'Kylian Mbappé Lottin': [0, 0], 'Toby Alderweireld': [0, 0], 'Kevin De Bruyne': [0, 0], 'Blaise Matuidi': [0, 0], 'Jan Vertonghen': [0, 0], 'Paul Pogba': [0, 0], 'Axel Witsel': [0, 0], 'Samuel Yves Umtiti': [0, 0], 'Lucas Hernández Pi': [0, 0], 'N"Golo Kanté': [0, 0], 'Hugo Lloris': [0, 0], 'Olivier Giroud': [0, 0], 'Eden Hazard': [0, 0], 'Nacer Chadli': [0, 0], 'Moussa Sidi Yaya Dembélé': [0, 0], 'Marouane Fellaini-Bakkioui': [0, 0], 'Vincent Kompany': [0, 0], 'Thibaut Courtois': [0, 0], 'Romelu Lukaku Menama': [0, 0], 'Dries Mertens': [0, 0], 'Yannick Ferreira Carrasco': [0, 0], 'Steven N"Kemboanza Mike Christopher Nzonzi': [0, 0], 'Corentin Tolisso': [0, 0]}


In [23]:
def players_in_pos(pos):
    contribution = ['pass', 'dribble', 'throw_in', 'corner_crossed', 'freekick_crossed', 'cross', 'shot', 
                  'freekick_short', 'goalkick', 'corner_short', 'shot_penalty']
    
    players = []
    for play in pos:
        action = play["type_name"]
        player = play["player_name"]
        if action in contribution and result == 'success':
            if player not in players:
                players.append(player)
            
    return players

In [24]:
players_in_pos(p_test)

['Antoine Griezmann',
 'Raphaël Varane',
 'Benjamin Pavard',
 'Kylian Mbappé Lottin']

In [25]:
contribution = ['pass', 'dribble', 'throw_in', 'corner_crossed', 'freekick_crossed', 'cross', 'shot', 
                  'freekick_short', 'goalkick', 'corner_short', 'shot_penalty']

pos_players = []

for play in p_test:
    player = play['player_name']
    if play['type_name'] in contribution and play['result_name'] == 'success':
        if player not in pos_players:
            pos_players.append(player)
            
print(pos_players)

['Antoine Griezmann', 'Raphaël Varane', 'Benjamin Pavard', 'Kylian Mbappé Lottin']


In [26]:
for pos in all_possessions:
    players = players_in_pos(pos)

    shot = False
    for play in pos:
        if play['type_name'] == 'shot':
            shot = True
            
    if shot:
        for player in players:
            game_centrality[player][0] += 1
            
        for player in game_centrality.keys():
            game_centrality[player][1] += 1

In [27]:
avg_cont = {}
for key in game_centrality.keys():
    pos_num = game_centrality[key][1]
    average_centrality = game_centrality[key][0] / pos_num if pos_num > 0 else 0
    #print(average_centrality)
    avg_cont[key] = average_centrality
    #print(avg_centrality[key])

In [28]:
from collections import OrderedDict
dd = OrderedDict(sorted(avg_cont.items(), key=lambda x: x[1], reverse=True))
for key in dd:
    print(key + ": " + str(dd[key]))

Antoine Griezmann: 0.48
Kylian Mbappé Lottin: 0.36
N"Golo Kanté: 0.36
Paul Pogba: 0.32
Olivier Giroud: 0.32
Toby Alderweireld: 0.28
Blaise Matuidi: 0.24
Lucas Hernández Pi: 0.24
Benjamin Pavard: 0.2
Axel Witsel: 0.2
Samuel Yves Umtiti: 0.2
Vincent Kompany: 0.2
Raphaël Varane: 0.16
Kevin De Bruyne: 0.16
Eden Hazard: 0.16
Nacer Chadli: 0.16
Jan Vertonghen: 0.12
Moussa Sidi Yaya Dembélé: 0.12
Marouane Fellaini-Bakkioui: 0.12
Dries Mertens: 0.08
Hugo Lloris: 0.04
Romelu Lukaku Menama: 0.04
Corentin Tolisso: 0.04
Thibaut Courtois: 0.0
Yannick Ferreira Carrasco: 0.0
Steven N"Kemboanza Mike Christopher Nzonzi: 0.0


# Centrality

In [29]:
def pos_pass_list(pos):
    edges = []
    pass_action = ['pass', 'throw_in', 'corner_crossed', 'freekick_crossed', 'cross', 
                  'freekick_short', 'goalkick', 'corner_short']
    for i in range(len(pos)):
        play = pos[i]
        if play["type_name"] in pass_action:
            if play["result_name"] == 'success':
                try:
                    passer = play["player_name"]
                    receiver = pos[i+1]["player_name"]
                    edges.append((passer , receiver))
                except:
                    return edges
                
    return edges

In [30]:
passes = pos_pass_list(p_test)

In [31]:
def get_centrality(passes):
    G = nx.DiGraph((x, y, {'weight': v}) for (x, y), v in Counter(passes).items())
    #print(*G.edges(data=True), sep='\n')
    return nx.degree_centrality(G)

In [32]:
get_centrality(passes)

{'Antoine Griezmann': 0.6666666666666666,
 'Raphaël Varane': 0.6666666666666666,
 'Benjamin Pavard': 0.6666666666666666,
 'Kylian Mbappé Lottin': 0.6666666666666666}

In [33]:
for pos in all_possessions:
    passes = pos_pass_list(pos)
    if len(passes) > 0:
        centrality = get_centrality(passes)
        for player in centrality.keys():
            game_centrality[player] = (game_centrality[player][0] + centrality[player], game_centrality[player][1] + 1)

In [34]:
for key in game_centrality.keys():
    pos_num = game_centrality[key][1]
    average_centrality = game_centrality[key][0] / pos_num if pos_num > 0 else 0
    #print(average_centrality)
    avg_centrality[key] = average_centrality
    #print(avg_centrality[key])

In [35]:
print(len(all_possessions))

222


In [36]:
print(avg_centrality)

{'Antoine Griezmann': 0.5193678641047061, 'Raphaël Varane': 0.30856625258799175, 'Benjamin Pavard': 0.46934523809523815, 'Kylian Mbappé Lottin': 0.5008080808080807, 'Toby Alderweireld': 0.5619666048237477, 'Kevin De Bruyne': 0.4134114583333334, 'Blaise Matuidi': 0.4028730158730158, 'Jan Vertonghen': 0.34271825396825384, 'Paul Pogba': 0.46069182389937097, 'Axel Witsel': 0.3996787603930461, 'Samuel Yves Umtiti': 0.36633885438233266, 'Lucas Hernández Pi': 0.3990667254556143, 'N"Golo Kanté': 0.4879560011138957, 'Hugo Lloris': 0.15170401493930905, 'Olivier Giroud': 0.4163015873015873, 'Eden Hazard': 0.4191220238095239, 'Nacer Chadli': 0.4250163078930201, 'Moussa Sidi Yaya Dembélé': 0.2959473150962513, 'Marouane Fellaini-Bakkioui': 0.32310495626822155, 'Vincent Kompany': 0.5058965773809525, 'Thibaut Courtois': 0.1270913770913771, 'Romelu Lukaku Menama': 0.2114229024943311, 'Dries Mertens': 0.1996782496782497, 'Yannick Ferreira Carrasco': 0.059770114942528735, 'Steven N"Kemboanza Mike Christo

In [37]:
from collections import OrderedDict
dd = OrderedDict(sorted(avg_centrality.items(), key=lambda x: x[1], reverse=True))
for key in dd:
    print(key + ": " + str(dd[key]))

Toby Alderweireld: 0.5619666048237477
Antoine Griezmann: 0.5193678641047061
Vincent Kompany: 0.5058965773809525
Kylian Mbappé Lottin: 0.5008080808080807
N"Golo Kanté: 0.4879560011138957
Benjamin Pavard: 0.46934523809523815
Paul Pogba: 0.46069182389937097
Nacer Chadli: 0.4250163078930201
Eden Hazard: 0.4191220238095239
Olivier Giroud: 0.4163015873015873
Kevin De Bruyne: 0.4134114583333334
Blaise Matuidi: 0.4028730158730158
Axel Witsel: 0.3996787603930461
Lucas Hernández Pi: 0.3990667254556143
Samuel Yves Umtiti: 0.36633885438233266
Jan Vertonghen: 0.34271825396825384
Marouane Fellaini-Bakkioui: 0.32310495626822155
Raphaël Varane: 0.30856625258799175
Moussa Sidi Yaya Dembélé: 0.2959473150962513
Romelu Lukaku Menama: 0.2114229024943311
Dries Mertens: 0.1996782496782497
Hugo Lloris: 0.15170401493930905
Thibaut Courtois: 0.1270913770913771
Corentin Tolisso: 0.1261904761904762
Yannick Ferreira Carrasco: 0.059770114942528735
Steven N"Kemboanza Mike Christopher Nzonzi: 0.038461538461538464


# Per Game

In [38]:
players = pd.read_hdf(spadl_h5,"players")
teams = pd.read_hdf(spadl_h5,"teams")
actiontypes = pd.read_hdf(spadl_h5, "actiontypes")
bodyparts = pd.read_hdf(spadl_h5, "bodyparts")
results = pd.read_hdf(spadl_h5, "results")

contribution = ['pass', 'dribble', 'throw_in', 'corner_crossed', 'freekick_crossed', 'cross', 'shot', 
                  'freekick_short', 'goalkick', 'corner_short', 'shot_penalty']

game = list(games.itertuples())[0]
#for game in tqdm.tqdm(list(games.itertuples()))[0]:
actions = pd.read_hdf(spadl_h5,f"actions/game_{game.game_id}")
actions = (
    actions.merge(actiontypes)
    .merge(results)
    .merge(bodyparts)
    .merge(players,"left",on="player_id")
    .merge(teams,"left",on="team_id")
    .sort_values(["period_id", "time_seconds", "timestamp"])
    .reset_index(drop=True)
)

all_possessions = []
curr_possession = []
possessing_team = actions.loc[0]["team_id"]
for i in range(len(actions)):
    action = actions.loc[i]["type_name"]
    action_team = actions.loc[i]["team_id"]
    result = actions.loc[i]["result_name"]
    if change_possession(action, action_team, possessing_team, result):
        possessing_team = action_team
        all_possessions.append(copy.deepcopy(curr_possession))
        curr_possession = []

    curr_possession.append(actions.loc[i])

game_centrality, team1, team2 = players_in_game(actions)

for pos in all_possessions:
    for pos in all_possessions:
        pos_players = players_in_pos(pos)

        for play in pos:
            if play['type_name'] == 'shot':
                for player in pos_players:
                    game_centrality[player][0] += 1

                if play['team_id'] == actions.loc[0]["team_id"]:
                    for player in team1:
                        game_centrality[player][1] += 1
                else:
                    for player in team2:
                        game_centrality[player][1] += 1

total_flow = {}
for player in players['player_name']:
    total_flow[player] = [0,0]
                    
for key in game_centrality.keys():
    total_flow[key][0] += game_centrality[key][0]
    total_flow[key][1] += game_centrality[key][1]
        
avg_flow = {}
for key in total_flow.keys():
        pos_num = total_flow[key][1]
        if pos_num > 0:
            avg_flow[key] = total_flow[key][0] / pos_num 
        
from collections import OrderedDict
dd = OrderedDict(sorted(avg_flow.items(), key=lambda x: x[1], reverse=True))
for key in dd:
    print(key + ": " + str(dd[key]))

Toby Alderweireld: 0.7777777777777778
Antoine Griezmann: 0.7222222222222222
Kylian Mbappé Lottin: 0.5555555555555556
Olivier Giroud: 0.5555555555555556
N"Golo Kanté: 0.5555555555555556
Axel Witsel: 0.5555555555555556
Vincent Kompany: 0.5555555555555556
Paul Pogba: 0.5
Blaise Matuidi: 0.4444444444444444
Lucas Hernández Pi: 0.4444444444444444
Kevin De Bruyne: 0.4444444444444444
Eden Hazard: 0.4444444444444444
Nacer Chadli: 0.4444444444444444
Raphaël Varane: 0.3333333333333333
Samuel Yves Umtiti: 0.3333333333333333
Moussa Sidi Yaya Dembélé: 0.3333333333333333
Jan Vertonghen: 0.3333333333333333
Marouane Fellaini-Bakkioui: 0.3333333333333333
Benjamin Pavard: 0.2777777777777778
Dries Mertens: 0.2222222222222222
Hugo Lloris: 0.1111111111111111
Romelu Lukaku Menama: 0.1111111111111111
Corentin Tolisso: 0.05555555555555555
Thibaut Courtois: 0.0
Steven N"Kemboanza Mike Christopher Nzonzi: 0.0
Yannick Ferreira Carrasco: 0.0


In [39]:
players = pd.read_hdf(spadl_h5,"players")
teams = pd.read_hdf(spadl_h5,"teams")
actiontypes = pd.read_hdf(spadl_h5, "actiontypes")
bodyparts = pd.read_hdf(spadl_h5, "bodyparts")
results = pd.read_hdf(spadl_h5, "results")

total_flow = {}
for player in players['player_name']:
    total_flow[player] = [0,0]

contribution = ['pass', 'dribble', 'throw_in', 'corner_crossed', 'freekick_crossed', 'cross', 'shot', 
                  'freekick_short', 'goalkick', 'corner_short', 'shot_penalty']
    
for game in tqdm.tqdm(list(games.itertuples())):
    actions = pd.read_hdf(spadl_h5,f"actions/game_{game.game_id}")
    actions = (
        actions.merge(actiontypes)
        .merge(results)
        .merge(bodyparts)
        .merge(players,"left",on="player_id")
        .merge(teams,"left",on="team_id")
        .sort_values(["period_id", "time_seconds", "timestamp"])
        .reset_index(drop=True)
    )

    all_possessions = []
    curr_possession = []
    possessing_team = actions.loc[0]["team_id"]
    for i in range(len(actions)):
        action = actions.loc[i]["type_name"]
        action_team = actions.loc[i]["team_id"]
        result = actions.loc[i]["result_name"]
        if change_possession(action, action_team, possessing_team, result):
            possessing_team = action_team
            all_possessions.append(copy.deepcopy(curr_possession))
            curr_possession = []

        curr_possession.append(actions.loc[i])

    game_centrality, team1, team2 = players_in_game(actions)

    for pos in all_possessions:
        pos_players = []

        for play in pos:
            player = play['player_name']
            if play['type_name'] in contribution and play['result_name'] == 'success':
                if player not in pos_players:
                    pos_players.append(player)
            
            if play['type_name'] == 'shot':
                for player in pos_players:
                    game_centrality[player][0] += 1

                if play['team_id'] == actions.loc[0]["team_id"]:
                    for player in team1:
                        game_centrality[player][1] += 1
                else:
                    for player in team2:
                        game_centrality[player][1] += 1

    for key in game_centrality.keys():
        total_flow[key][0] += game_centrality[key][0]
        total_flow[key][1] += game_centrality[key][1]

100%|██████████████████████████████████████████████████████████████████████████████████| 64/64 [01:36<00:00,  1.56s/it]


In [40]:
avg_flow = {}
for key in total_flow.keys():
        pos_num = total_flow[key][1]
        if pos_num >= 15:
            avg_flow[key] = total_flow[key][0] / pos_num 
        
from collections import OrderedDict
dd = OrderedDict(sorted(avg_flow.items(), key=lambda x: x[1], reverse=True))
for key in dd:
    print(key + ": " + str(dd[key]))

Lionel Andrés Messi Cuccittini: 0.7843137254901961
Heung-Min Son: 0.78125
Christian Alberto Cueva Bravo: 0.7096774193548387
Toni Kroos: 0.6805555555555556
Aaron Mooy: 0.6774193548387096
Kieran Trippier: 0.6710526315789473
Francisco Román Alarcón Suárez: 0.6470588235294118
Rami Bedoui: 0.625
Sung-Yeung Ki: 0.6190476190476191
Sadio Mané: 0.6153846153846154
Neymar da Silva Santos Junior: 0.6138613861386139
Mesut Özil: 0.6111111111111112
Edgar Yoel Bárcenas Herrera: 0.5909090909090909
Kevin De Bruyne: 0.5842696629213483
André Martín Carrillo Díaz: 0.5806451612903226
Filipe Luis Kasmirski: 0.5757575757575758
Philippe Coutinho Correia: 0.5643564356435643
Hakim Ziyech: 0.5641025641025641
Paul Pogba: 0.5573770491803278
Wahbi Khazri: 0.5526315789473685
Cristiano Ronaldo dos Santos Aveiro: 0.5510204081632653
Carlos Alberto Vela Garrido: 0.55
Shinji Kagawa: 0.5483870967741935
Luka Modrić: 0.5462962962962963
Xherdan Shaqiri: 0.5454545454545454
Ali Maâloul: 0.5454545454545454
Sergej Milinković-Savi

Pontus Jansson: 0.09090909090909091
Kamil Glik: 0.09090909090909091
Wilfredo Daniel Caballero: 0.08823529411764706
Mohamed El Shenawy: 0.08695652173913043
Kenneth Josiah Omeruo: 0.08695652173913043
Sverrir Ingi Ingason: 0.08695652173913043
Aziz Bouhaddouz: 0.08695652173913043
Hotaru Yamaguchi: 0.08571428571428572
Mario Gómez García: 0.08333333333333333
Rafael Márquez Álvarez: 0.07894736842105263
David Ospina Ramírez: 0.07894736842105263
Khadim N"Diaye: 0.07692307692307693
Alberto Junior Rodríguez Valdelomar: 0.07407407407407407
Néstor Fernando Muslera Micol: 0.07272727272727272
Rodrigo Moreno Machado: 0.07017543859649122
Jamie Vardy: 0.06976744186046512
Alisson Ramsés Becker: 0.06930693069306931
Leon Goretzka: 0.06896551724137931
Sofyan Amrabat: 0.06666666666666667
Birkir Már Sævarsson: 0.0625
Ja-Cheol Koo: 0.0625
Emil Krafth: 0.0625
Igor Akinfeev: 0.06
Vladimir Stojković: 0.058823529411764705
Ari Freyr Skúlason: 0.058823529411764705
Yasser Abdullah Al Mosailem: 0.058823529411764705
Fr

In [41]:
print(total_flow["Heung-Min Son"])

[25, 32]
