# Load Data

In [1]:
%load_ext autoreload
%autoreload 2
import os; import sys; sys.path.append('../')
import pandas as pd
import tqdm
import warnings
import copy
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

import networkx as nx
import numpy as np
from collections import Counter
from collections import OrderedDict

In [2]:
## Configure file and folder names
datafolder = "../data"
spadl_h5 = os.path.join(datafolder,"spadl-statsbomb.h5")
predictions_h5 = os.path.join(datafolder,"predictions.h5")

In [3]:
games = pd.read_hdf(spadl_h5,"games")
games = games[games.competition_name == "FIFA World Cup"]
print("nb of games:", len(games))

nb of games: 64


# Helper Functions

In [4]:
def players_in_game(B):
    team_id = B.loc[0]["team_id"]
    team1 = []
    team2 = []
    players = {}
    for i in range(len(B)):
        player = B.loc[i]["player_name"]
        if players.get(player) == None:
            players[player] = [0,0]
            if B.loc[i]["team_id"] == team_id:
                team1.append(player)
            else:
                team2.append(player)
            
    return players, team1, team2

In [5]:
def players_in_pos(pos):
    contribution_action = ['pass', 'dribble', 'throw_in', 'corner_crossed', 'freekick_crossed', 'cross', 'shot', 
                  'freekick_short', 'goalkick', 'corner_short', 'shot_penalty']
    shot_action = ['shot', 'shot_penalty']
    
    pos_players = []
    for play in pos:
        play_type = play['type_name']
        player = play['player_name']
        if (play_type in contribution_action and play['result_name'] == 'success') or play_type in shot_action:
            if player not in pos_players:
                pos_players.append(player)
            
    return pos_players

In [6]:
def change_possession(action, action_team, possession_team, result):
    end_pos = ['bad_touch', 'foul']
    change_team = ['pass', 'dribble', 'throw_in', 'corner_crossed', 'freekick_crossed', 'cross', 'shot', 
                  'freekick_short', 'goalkick', 'corner_short', 'shot_penalty', 'keeper_pick_up']
    success_change = ['tackle', 'interception', 'take_on', 'clearance', 'keeper_claim', 'keeper_save', 
                      'keeper_punch']
    
    if possession_team == None:
        if result == 'success':
            if action in change_team:
                possession_team = action_team
        else:
            return False, None
    
    
    if action in end_pos:
        return True, None
    
    if action_team != possession_team:
        if action in change_team:
            return True, action_team
        if result == 'success':
            if action in success_change:
                return True, action_team
            
    return False, possession_team

In [7]:
def extract_possessions(actions):
    all_possessions = []
    curr_possession = []
    possessing_team = actions.loc[0]["team_id"]
        
    for i in range(len(actions)):
        action = actions.loc[i]["type_name"]
        action_team = actions.loc[i]["team_id"]
        result = actions.loc[i]["result_name"]
        end_pos, possessing_team = change_possession(action, action_team, possessing_team, result)
        
        if end_pos:
            all_possessions.append(copy.deepcopy(curr_possession))
            curr_possession = []

        curr_possession.append(actions.loc[i])

    return all_possessions

In [8]:
def count_contributions(actions, start_team):
    possessions = extract_possessions(actions)
    contributions, team1, team2 = players_in_game(actions)

    for pos in possessions:
        pos_players = []

        pos_players = players_in_pos(pos)
        
        for play in pos:
            if play['type_name'] == 'shot':
                for player in pos_players:
                    contributions[player][0] += 1

                if play['team_id'] == start_team:
                    for player in team1:
                        contributions[player][1] += 1
                else:
                    for player in team2:
                        contributions[player][1] += 1
                        
                break
                        
    return contributions

# Compute Contribution Rates

In [9]:
players = pd.read_hdf(spadl_h5,"players")
teams = pd.read_hdf(spadl_h5,"teams")
actiontypes = pd.read_hdf(spadl_h5, "actiontypes")
bodyparts = pd.read_hdf(spadl_h5, "bodyparts")
results = pd.read_hdf(spadl_h5, "results")

In [10]:
total_contributions = {}
for player in players['player_name']:
    total_contributions[player] = [0,0]

roster = {}
tourney_teams = {}
    
for game in tqdm.tqdm(list(games.itertuples())):
    actions = pd.read_hdf(spadl_h5,f"actions/game_{game.game_id}")
    actions = (
        actions.merge(actiontypes)
        .merge(results)
        .merge(bodyparts)
        .merge(players,"left",on="player_id")
        .merge(teams,"left",on="team_id")
        .sort_values(["period_id", "time_seconds", "timestamp"])
        .reset_index(drop=True)
    )
    
    for i in range(len(actions)):
        player = actions.loc[i]["player_name"]
        team = actions.loc[i]["team_name"]
        if team not in tourney_teams:
            tourney_teams[team] = 2
        if player not in roster:
            roster[player] = team
    
    contributions = count_contributions(actions, actions.loc[0]["team_id"])

    for player in contributions.keys():
        total_contributions[player][0] += contributions[player][0]
        total_contributions[player][1] += contributions[player][1]

100%|██████████████████████████████████████████████████████████████████████████████████| 64/64 [02:08<00:00,  2.08s/it]


In [11]:
iter_help = copy.deepcopy(total_contributions)
for player in iter_help:
    if total_contributions[player][1] == 0:
        del total_contributions[player]

In [12]:
count_teams = copy.deepcopy(tourney_teams)

contribution_rate = {}
for player in total_contributions.keys():
        pos_num = total_contributions[player][1]
        if pos_num >= 20:
            contribution_rate[player] = total_contributions[player][0] / pos_num 
        
ordered_rates = OrderedDict(sorted(contribution_rate.items(), key=lambda x: x[1], reverse=True))

for player in ordered_rates:
    team = roster[player]
    if count_teams[team] > 0:
        print(player + "(" + team + "): " + str(ordered_rates[player]))
        count_teams[team] -= 1

Heung-Min Son(South Korea): 0.8148148148148148
Lionel Andrés Messi Cuccittini(Argentina): 0.7555555555555555
Aaron Mooy(Australia): 0.7142857142857143
Neymar da Silva Santos Junior(Brazil): 0.6947368421052632
Kieran Trippier(England): 0.6875
Toni Kroos(Germany): 0.6666666666666666
Christian Alberto Cueva Bravo(Peru): 0.6666666666666666
M"Baye Babacar Niang(Senegal): 0.6666666666666666
Hakim Ziyech(Morocco): 0.6666666666666666
Francisco Román Alarcón Suárez(Spain): 0.6557377049180327
Wahbi Khazri(Tunisia): 0.6470588235294118
Mesut Özil(Germany): 0.6170212765957447
Xherdan Shaqiri(Switzerland): 0.6153846153846154
Philippe Coutinho Correia(Brazil): 0.6
Aleksandar Mitrović(Serbia): 0.59375
Sadio Mané(Senegal): 0.5833333333333334
Luka Modrić(Croatia): 0.5757575757575758
Shinji Kagawa(Japan): 0.5714285714285714
Eden Hazard(Belgium): 0.569620253164557
Kevin De Bruyne(Belgium): 0.5569620253164557
Jae-Sung Lee(South Korea): 0.5555555555555556
Christian Dannemann Eriksen(Denmark): 0.552631578947