# Load Data

In [20]:
%load_ext autoreload
%autoreload 2
import os; import sys; sys.path.append('../')
import pandas as pd
import tqdm
import warnings
import copy
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

import networkx as nx
import numpy as np
from collections import Counter
from collections import OrderedDict

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [21]:
## Configure file and folder names
datafolder = "../data"
spadl_h5 = os.path.join(datafolder,"spadl-statsbomb.h5")
predictions_h5 = os.path.join(datafolder,"predictions.h5")

In [22]:
games = pd.read_hdf(spadl_h5,"games")
games = games[games.competition_name == "La Liga"]
print("nb of games:", len(games))

nb of games: 348


# Helper Functions

In [23]:
def players_in_game(B):
    team_id = B.loc[0]["team_id"]
    team1 = []
    team2 = []
    players = {}
    for i in range(len(B)):
        player = B.loc[i]["player_name"]
        if players.get(player) == None:
            players[player] = [0,0]
            if B.loc[i]["team_id"] == team_id:
                team1.append(player)
            else:
                team2.append(player)
            
    return players, team1, team2

In [24]:
def players_in_pos(pos):
    contribution_action = ['pass', 'dribble', 'throw_in', 'corner_crossed', 'freekick_crossed', 'cross', 'shot', 
                  'freekick_short', 'goalkick', 'corner_short', 'shot_penalty']
    shot_action = ['shot', 'shot_penalty']
    
    pos_players = []
    for play in pos:
        play_type = play['type_name']
        player = play['player_name']
        if (play_type in contribution_action and play['result_name'] == 'success') or play_type in shot_action:
            if player not in pos_players:
                pos_players.append(player)
            
    return pos_players

In [25]:
def change_possession(action, action_team, possession_team, result):
    end_pos = ['bad_touch', 'foul']
    change_team = ['pass', 'dribble', 'throw_in', 'corner_crossed', 'freekick_crossed', 'cross', 'shot', 
                  'freekick_short', 'goalkick', 'corner_short', 'shot_penalty', 'keeper_pick_up']
    success_change = ['tackle', 'interception', 'take_on', 'clearance', 'keeper_claim', 'keeper_save', 
                      'keeper_punch']
    
    if possession_team == None:
        if result == 'success':
            if action in change_team:
                possession_team = action_team
        else:
            return False, None
    
    
    if action in end_pos:
        return True, None
    
    if action_team != possession_team:
        if action in change_team:
            return True, action_team
        if result == 'success':
            if action in success_change:
                return True, action_team
            
    return False, possession_team

In [26]:
def extract_possessions(actions):
    all_possessions = []
    curr_possession = []
    possessing_team = actions.loc[0]["team_id"]
        
    for i in range(len(actions)):
        action = actions.loc[i]["type_name"]
        action_team = actions.loc[i]["team_id"]
        result = actions.loc[i]["result_name"]
        end_pos, possessing_team = change_possession(action, action_team, possessing_team, result)
        
        if end_pos:
            all_possessions.append(copy.deepcopy(curr_possession))
            curr_possession = []

        curr_possession.append(actions.loc[i])

    return all_possessions

In [27]:
def count_contributions(actions, start_team):
    possessions = extract_possessions(actions)
    contributions, team1, team2 = players_in_game(actions)

    for pos in possessions:
        pos_players = []

        pos_players = players_in_pos(pos)
        
        for play in pos:
            if play['type_name'] == 'shot':
                for player in pos_players:
                    contributions[player][0] += 1

                if play['team_id'] == start_team:
                    for player in team1:
                        contributions[player][1] += 1
                else:
                    for player in team2:
                        contributions[player][1] += 1
                        
                break
                        
    return contributions

# Compute Contribution Rates

In [28]:
players = pd.read_hdf(spadl_h5,"players")
teams = pd.read_hdf(spadl_h5,"teams")
actiontypes = pd.read_hdf(spadl_h5, "actiontypes")
bodyparts = pd.read_hdf(spadl_h5, "bodyparts")
results = pd.read_hdf(spadl_h5, "results")

In [29]:
total_contributions = {}
for player in players['player_name']:
    total_contributions[player] = [0,0]

roster = {}
tourney_teams = {}
    
for game in tqdm.tqdm(list(games.itertuples())):
    actions = pd.read_hdf(spadl_h5,f"actions/game_{game.game_id}")
    actions = (
        actions.merge(actiontypes)
        .merge(results)
        .merge(bodyparts)
        .merge(players,"left",on="player_id")
        .merge(teams,"left",on="team_id")
        .sort_values(["period_id", "time_seconds", "timestamp"])
        .reset_index(drop=True)
    )
    
    for i in range(len(actions)):
        player = actions.loc[i]["player_name"]
        team = actions.loc[i]["team_name"]
        if team not in tourney_teams:
            tourney_teams[team] = 2
        if player not in roster:
            roster[player] = team
    
    contributions = count_contributions(actions, actions.loc[0]["team_id"])

    for player in contributions.keys():
        total_contributions[player][0] += contributions[player][0]
        total_contributions[player][1] += contributions[player][1]

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 348/348 [13:14<00:00,  2.06s/it]


In [30]:
iter_help = copy.deepcopy(total_contributions)
for player in iter_help:
    if total_contributions[player][1] == 0:
        del total_contributions[player]

In [34]:
count_teams = copy.deepcopy(tourney_teams)

contribution_rate = {}
for player in total_contributions.keys():
        pos_num = total_contributions[player][1]
        if pos_num >= 50:
            contribution_rate[player] = total_contributions[player][0] / pos_num 
        
ordered_rates = OrderedDict(sorted(contribution_rate.items(), key=lambda x: x[1], reverse=True))
count = 20
for player in ordered_rates:
    team = roster[player]
    if count > 0:
    #if count_teams[team] > 0:
        print(player + "(" + team + "): " + str(ordered_rates[player]))
        count -= 1
        #count_teams[team] -= 1

Roberto Trashorras Gayoso(Rayo Vallecano): 0.6176470588235294
Lionel Andrés Messi Cuccittini(Barcelona): 0.5596310596310596
Xavier Hernández Creus(Barcelona): 0.5276763135712279
Ronaldo de Assis Moreira(Barcelona): 0.517555266579974
Frédéric Oumar Kanouté(Sevilla): 0.5168539325842697
Neymar da Silva Santos Junior(Barcelona): 0.5160142348754448
Sergio García De La Fuente(Espanyol): 0.5108695652173914
Anderson Luís de Souza(Barcelona): 0.5047106325706595
Luis Alberto Suárez Díaz(Barcelona): 0.49074074074074076
Gareth Frank Bale(Real Madrid): 0.48148148148148145
Fernando José Torres Sanz(Atlético Madrid): 0.48
Antoine Griezmann(Real Sociedad): 0.4772727272727273
Francisco Javier Yeste Navarro(Athletic Bilbao): 0.47540983606557374
Luka Modrić(Real Madrid): 0.47435897435897434
Samuel Eto"o Fils(Barcelona): 0.4722222222222222
Ander Herrera Agüera(Real Zaragoza): 0.4716981132075472
Pablo Hernández Domínguez(Valencia): 0.46875
Álvaro Cejudo Carmona(Osasuna): 0.4666666666666667
Cristiano Ronald

In [32]:
print(total_contributions)

{'Alexis Alejandro Sánchez Sánchez': [337, 1013], 'Francesc Fàbregas i Soler': [470, 1078], 'Pedro Eliezer Rodríguez Ledesma': [767, 2396], 'Neymar da Silva Santos Junior': [580, 1124], 'Daniel Alves da Silva': [1286, 3036], 'Sergio Busquets i Burgos': [1068, 3047], 'Jordi Alba Ramos': [467, 1210], 'Gerard Piqué Bernabéu': [721, 2610], 'Andrés Iniesta Luján': [1541, 3751], 'Lionel Andrés Messi Cuccittini': [2609, 4662], 'Javier Alejandro Mascherano': [564, 2131], 'Sergi Roberto Carnicer': [131, 664], 'Xavier Hernández Creus': [1878, 3559], 'Víctor Valdés Arribas': [329, 3400], 'Thibaut Courtois': [4, 52], 'Diego da Silva Costa': [22, 67], 'Jorge Resurrección Merodio': [31, 96], 'Diego Roberto Godín Leal': [19, 114], 'Cristian Gabriel Rodríguez Barrotti': [2, 19], 'João Miranda de Souza Filho': [9, 52], 'Filipe Luis Kasmirski': [13, 74], 'Juan Francisco Torres Belén': [22, 108], 'Gabriel Fernández Arenas': [54, 136], 'Raúl García Escudero': [42, 104], 'Arda Turan': [59, 287], 'David Vil




# Convert to CSV

In [42]:
df = pd.DataFrame.from_dict(ordered_rates, orient='index')
df.to_csv('Womens_World_Cup.csv')