In [99]:
%matplotlib inline

from collections import defaultdict
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json

In [23]:
def load_dataset(tournament='World_Cup'):
    matches, events = {}, {}
    matches = json.load(open('./data/matches/matches_{}.json'.format(tournament)))
    events = json.load(open('./data/events/events_{}.json'.format(tournament)))
    return matches, events

def get_match(matches, events):
    match_id2events = defaultdict(list)
    match_id2match = defaultdict(dict)
    for event in events:
        match_id = event['matchId']
        match_id2events[match_id].append(event)
                                         
    for match in matches:
        match_id = match['wyId']
        match_id2match[match_id] = match
    
    return match_id2events, match_id2match
    
def get_player():
    players = json.load(open('./data/players.json'))
    player_id2player = defaultdict(dict)
    for player in players:
        player_id = player['wyId']
        player_id2player[player_id] = player
    return player_id2player
        
def get_competitions():
    competitions = json.load(open('./data/competitions.json'))
    competition_id2competition = defaultdict(dict)
    for competition in competitions:
        competition_id = competition['wyId']
        competition_id2competition[competition_id] = competition
    return competition_id2competition

def get_teams():
    teams = json.load(open('./data/teams.json'))
    team_id2team = defaultdict(dict)
    for team in teams:
        team_id = team['wyId']
        team_id2team[team_id] = team
    return team_id2team

In [24]:
matches, events = load_dataset()
match_id2events, match_id2match = get_match(matches, events)
player_id2player = get_player()
competition_id2competition = get_competitions()
team_id2team = get_teams()

In [39]:
event_key = []
sub_event_key = []

for events in match_id2events.values():
    for event in events:
        event_key.append(event['eventName'])
        sub_event_key.append(event['subEventName'])
        
event_key = list(set(event_key))
sub_event_key = list(set(sub_event_key))

In [25]:
team_id2team[4418]

{'city': 'Paris',
 'name': 'France',
 'wyId': 4418,
 'officialName': 'France',
 'area': {'name': 'France',
  'id': 250,
  'alpha3code': 'FRA',
  'alpha2code': 'FR'},
 'type': 'national'}

In [52]:
event_stats = []
sub_event_stats = []
teamid = 4418

for match in match_id2match.values():
    for team in match['teamsData'].values():
        if team['teamId'] == teamid:
            print(match['label'])
            events = match_id2events[match['wyId']]
            event_stat = {key: 0 for key in event_key}
            sub_event_stat = {key: 0 for key in sub_event_key}

            for event in events:
                if event['teamId'] == teamid:
                    event_stat[event['eventName']] += 1
                    sub_event_stat[event['subEventName']] += 1

            event_stats.append(event_stat)
            sub_event_stats.append(sub_event_stat)

France - Croatia, 4 - 2
France - Belgium, 1 - 0
Uruguay - France, 0 - 2
France - Argentina, 4 - 3
Denmark - France, 0 - 0
France - Peru, 1 - 0
France - Australia, 2 - 1


In [55]:
event_stats[0]

{'Save attempt': 3,
 'Offside': 1,
 'Shot': 7,
 'Goalkeeper leaving line': 5,
 'Others on the ball': 86,
 'Free Kick': 51,
 'Pass': 241,
 'Duel': 194,
 'Foul': 13}

In [56]:
sub_event_stats[0]

{'Cross': 2,
 '': 1,
 'Touch': 59,
 'Hand pass': 2,
 'Shot': 7,
 'Head pass': 32,
 'Free Kick': 9,
 'Acceleration': 6,
 'Smart pass': 6,
 'Violent Foul': 0,
 'Clearance': 21,
 'Time lost foul': 0,
 'Out of game foul': 0,
 'High pass': 18,
 'Simulation': 0,
 'Throw in': 22,
 'Goal kick': 15,
 'Launch': 16,
 'Goalkeeper leaving line': 5,
 'Reflexes': 3,
 'Air duel': 41,
 'Penalty': 1,
 'Foul': 12,
 'Protest': 0,
 'Ground defending duel': 51,
 'Save attempt': 0,
 'Simple pass': 165,
 'Hand foul': 1,
 'Ground loose ball duel': 29,
 'Free kick shot': 0,
 'Free kick cross': 2,
 'Ground attacking duel': 73,
 'Corner': 2,
 'Late card foul': 0}

In [84]:
team_stats = defaultdict(list)

for match in match_id2match.values():
    for team in match['teamsData'].values():
        events = match_id2events[match['wyId']]
        event_stat = {key: 0 for key in event_key}
        sub_event_stat = {key: 0 for key in sub_event_key}

        for event in events:
            if event['teamId'] == team['teamId']:
                event_stat[event['eventName']] += 1
                sub_event_stat[event['subEventName']] += 1
        try:
            event_stat['played'] += 1
        except:
            event_stat['played'] = 1

#             event_stats.append(event_stat)
#             sub_event_stats.append(sub_event_stat)

        try:
            team_stats[team['teamId']] = {k: team_stats[team['teamId']][k] + event_stat[k] for k in event_stat}
        except:
            team_stats[team['teamId']] = event_stat

In [87]:
team_stats

defaultdict(list,
            {9598: {'Save attempt': 39,
              'Offside': 11,
              'Shot': 103,
              'Goalkeeper leaving line': 20,
              'Others on the ball': 577,
              'Free Kick': 392,
              'Pass': 3708,
              'Duel': 1601,
              'Foul': 114,
              'played': 7},
             4418: {'Save attempt': 17,
              'Offside': 3,
              'Shot': 68,
              'Goalkeeper leaving line': 8,
              'Others on the ball': 479,
              'Free Kick': 324,
              'Pass': 2931,
              'Duel': 1400,
              'Foul': 97,
              'played': 7},
             2413: {'Save attempt': 28,
              'Offside': 15,
              'Shot': 77,
              'Goalkeeper leaving line': 7,
              'Others on the ball': 458,
              'Free Kick': 364,
              'Pass': 3631,
              'Duel': 1301,
              'Foul': 73,
              'played': 7},
             5

In [134]:
data = []
for idx in team_stats:
#     print(team_id2team[idx]['name'], team_stats[idx])
    column = []
    try:
        column.append(team_id2team[idx]['name'])
        for key in team_stats[idx].keys():
            column.append(team_stats[idx][key])
        data.append(column)
    except:
        pass

In [135]:
keys = []
keys.append('Team')
for key in team_stats[10840].keys():
    keys.append(key)

In [136]:
keys

['Team',
 'Save attempt',
 'Offside',
 'Shot',
 'Goalkeeper leaving line',
 'Others on the ball',
 'Free Kick',
 'Pass',
 'Duel',
 'Foul',
 'played']

In [142]:
df = pd.DataFrame(data, columns=keys)
df.set_index('Team', inplace=True)

In [143]:
df.sort_values('Shot')

Unnamed: 0_level_0,Save attempt,Offside,Shot,Goalkeeper leaving line,Others on the ball,Free Kick,Pass,Duel,Foul,played
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Iran,10,3,15,11,195,132,587,567,46,3
Panama,20,6,20,2,149,121,954,478,52,3
Senegal,9,10,24,7,205,127,937,600,45,3
Australia,14,3,25,2,158,136,1410,600,36,3
Costa Rica,16,5,26,11,207,123,1024,646,45,3
Poland,10,5,26,4,183,130,1373,596,29,3
Egypt,16,4,26,10,211,132,1184,605,38,3
Peru,9,7,27,4,224,140,1278,672,37,3
Serbia,14,5,30,4,246,128,1081,702,46,3
Iceland,14,0,30,4,195,126,754,632,36,3


In [79]:
data_np = np.array()

KeyError: 'id'

In [74]:
a

{'id': 70, 'idx': 24}

In [76]:
team_stats[9598]

{}

In [77]:
team_stats[9598] + {'id':35, 'idx':12}

TypeError: unsupported operand type(s) for +: 'dict' and 'dict'