In [1]:
import pandas as pd
import json
import numpy as np
import os

pd.set_option('display.max_columns', 500)

In [30]:
files = os.listdir('data/matches/11')
arr = []
for f in files:
    with open('data/matches/11/' + f) as file:
        matches = json.load(file)
        matches_df = pd.DataFrame(matches)
        arr.append(matches_df)
        
matches_df = pd.concat(arr)

In [20]:
matches_df['match_date'] = pd.to_datetime(matches_df['match_date'])

In [21]:
matches_df.sort_values(by='match_date', ascending=True, inplace=True)
match_ids = matches_df['match_id'].values.tolist()

In [22]:
files = os.listdir('data/events')
arr = []
for f in files:
    if int(f.split('.')[0]) in match_ids:
        with open('data/events/' + f) as file:
            events = json.load(file)
            events = pd.DataFrame(events)
            arr.append(events)
            
events = pd.concat(arr)

In [23]:
events.columns

Index(['id', 'index', 'period', 'timestamp', 'minute', 'second', 'type',
       'possession', 'possession_team', 'play_pattern', 'team', 'duration',
       'tactics', 'related_events', 'player', 'position', 'location', 'pass',
       'carry', 'ball_receipt', 'under_pressure', 'duel', 'out',
       'ball_recovery', 'clearance', 'counterpress', 'shot', 'goalkeeper',
       'off_camera', 'dribble', 'foul_committed', 'foul_won', 'bad_behaviour',
       'interception', 'block', 'substitution', '50_50', 'miscontrol',
       'injury_stoppage'],
      dtype='object')

In [24]:
events.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 129059 entries, 0 to 4047
Data columns (total 39 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   id               129059 non-null  object 
 1   index            129059 non-null  int64  
 2   period           129059 non-null  int64  
 3   timestamp        129059 non-null  object 
 4   minute           129059 non-null  int64  
 5   second           129059 non-null  int64  
 6   type             129059 non-null  object 
 7   possession       129059 non-null  int64  
 8   possession_team  129059 non-null  object 
 9   play_pattern     129059 non-null  object 
 10  team             129059 non-null  object 
 11  duration         93339 non-null   float64
 12  tactics          128 non-null     object 
 13  related_events   125163 non-null  object 
 14  player           128635 non-null  object 
 15  position         128635 non-null  object 
 16  location         128294 non-null  object

In [25]:
events.fillna('', inplace=True)

In [26]:
events['type_id'] = events['type'].apply(lambda x: x['id'] if x != np.nan else np.nan)
events['type_name'] = events['type'].apply(lambda x: x['name'] if x != np.nan else np.nan)
events['possession_team_id'] = events['possession_team'].apply(lambda x: x['id'] if x != np.nan else np.nan)
events['possession_team_name'] = events['possession_team'].apply(lambda x: x['name'] if x != np.nan else np.nan)
events['play_pattern_id'] = events['play_pattern'].apply(lambda x: x['id'] if x != np.nan else np.nan)
events['play_pattern_name'] = events['play_pattern'].apply(lambda x: x['name'] if x != np.nan else np.nan)
events['team_id'] = events['team'].apply(lambda x: x['id'] if x != np.nan else np.nan)
events['team_name'] = events['team'].apply(lambda x: x['name'] if x != np.nan else np.nan)
events['formation'] = events['tactics'].apply(lambda x: x['formation'] if x != '' else '')
for i in range(0,11):
    events['player_' + str(i) + '_id'] = events['tactics'].apply(lambda x: x['lineup'][i]['player']['id'] if x != '' else '')
    events['player_' + str(i) + '_name'] = events['tactics'].apply(lambda x: x['lineup'][i]['player']['name'] if x != '' else '')
    events['player_' + str(i) + '_pos_id'] = events['tactics'].apply(lambda x: x['lineup'][i]['position']['id'] if x != '' else '')
    events['player_' + str(i) + '_pos_name'] = events['tactics'].apply(lambda x: x['lineup'][i]['position']['name'] if x != '' else '')
    events['player_' + str(i) + '_jersey_num'] = events['tactics'].apply(lambda x: x['lineup'][i]['jersey_number'] if x != '' else '')
    
events['related_events'] = events['related_events'].apply(lambda x: x[0] if x != '' else '')
events['event_player_id'] = events['player'].apply(lambda x: x['id'] if x != '' else '')
events['event_player_name'] = events['player'].apply(lambda x: x['name'] if x != '' else '')
events['event_player_pos_id'] = events['position'].apply(lambda x: x['id'] if x != '' else '')
events['event_player_pos_name'] = events['position'].apply(lambda x: x['name'] if x != '' else '')
events['event_player_loc_x'] = events['location'].apply(lambda x: x[0] if x != '' else '')
events['event_player_loc_y'] = events['location'].apply(lambda x: x[1] if x != '' else '')
events['event_pass_recipient_id'] = events['pass'].apply(lambda x: x['recipient']['id'] if 'recipient' in x and x != '' else '')
events['event_pass_recipient_name'] = events['pass'].apply(lambda x: x['recipient']['name'] if 'recipient' in x and x != '' else '')
events['event_pass_length'] = events['pass'].apply(lambda x: x['length'] if x != '' else '')
events['event_pass_angle'] = events['pass'].apply(lambda x: x['angle'] if x != '' else '')
events['event_pass_height_id'] = events['pass'].apply(lambda x: x['height']['id'] if x != '' else '')
events['event_pass_height_name'] = events['pass'].apply(lambda x: x['height']['name'] if x != '' else '')
events['event_pass_end_loc_x'] = events['pass'].apply(lambda x: x['end_location'][0] if x != '' else '')
events['event_pass_end_loc_y'] = events['pass'].apply(lambda x: x['end_location'][1] if x != '' else '')
events['event_pass_type_id'] = events['pass'].apply(lambda x: x['type']['id'] if 'type' in x and x != '' else '')
events['event_pass_type_name'] = events['pass'].apply(lambda x: x['type']['name'] if 'type' in x and x != '' else '')
events['event_pass_switch'] = events['pass'].apply(lambda x: x['switch'] if 'switch' in x and x != '' else False)
events['event_pass_outcome_id'] = events['pass'].apply(lambda x: x['outcome']['id'] if 'outcome' in x and x != '' else '')
events['event_pass_outcome_name'] = events['pass'].apply(lambda x: x['outcome']['name'] if 'outcome' in x and x != '' else '')
events['event_pass_outcome_name'] = events['pass'].apply(lambda x: x['outcome']['name'] if 'outcome' in x and x != '' else '')
events['event_pass_assisted_shot_id'] = events['pass'].apply(lambda x: x['assisted_shot_id'] if 'assisted_shot_id' in x and x != '' else '')
events['event_pass_shot_assist'] = events['pass'].apply(lambda x: x['shot_assist'] if 'shot_assist' in x and x != '' else False)
events['event_pass_shot_through_ball'] = events['pass'].apply(lambda x: x['through_ball'] if 'through_ball' in x and x != '' else False)
events['event_pass_shot_technique_id'] = events['pass'].apply(lambda x: x['technique']['id'] if 'technique' in x and x != '' else False)
events['event_pass_shot_technique_name'] = events['pass'].apply(lambda x: x['technique']['name'] if 'technique' in x and x != '' else False)
events['event_pass_body_part_id'] = events['pass'].apply(lambda x: x['body_part']['id'] if 'body_part' in x and x != '' else '')
events['event_pass_body_part_name'] = events['pass'].apply(lambda x: x['body_part']['name'] if 'body_part' in x and x != '' else '')
events['event_end_location_x'] = events['carry'].apply(lambda x: x['end_location'][0] if x != '' else '')
events['event_end_location_y'] = events['carry'].apply(lambda x: x['end_location'][1] if x != '' else '')
events['event_outcome_id'] = events['ball_receipt'].apply(lambda x: x['outcome']['id'] if x != '' else '')
events['event_outcome_name'] = events['ball_receipt'].apply(lambda x: x['outcome']['name'] if x != '' else '')
events['event_ball_recovery'] = events['ball_recovery'].apply(lambda x: x['recovery_failure'] if 'recovery_failure' in x and x != '' else '')
events['event_shot_xg'] = events['shot'].apply(lambda x: x['statsbomb_xg'] if 'statsbomb_xg' in x and  x != '' else '')
events['event_shot_end_loc'] = events['shot'].apply(lambda x: x['end_location'] if 'end_location' in x and  x != '' else '')
events['event_shot_key_pass_id'] = events['shot'].apply(lambda x: x['key_pass_id'] if 'key_pass_id' in x and  x != '' else '')
events['event_shot_body_part_id'] = events['shot'].apply(lambda x: x['body_part']['id'] if 'body_part' in x and  x != '' else '')
events['event_shot_body_part_name'] = events['shot'].apply(lambda x: x['body_part']['name'] if 'body_part' in x and  x != '' else '')
events['event_shot_type_id'] = events['shot'].apply(lambda x: x['type']['id'] if 'type' in x and  x != '' else '')
events['event_shot_type_name'] = events['shot'].apply(lambda x: x['type']['name'] if 'type' in x and  x != '' else '')
events['event_shot_outcome_id'] = events['shot'].apply(lambda x: x['outcome']['id'] if 'outcome' in x and  x != '' else '')
events['event_shot_outcome_name'] = events['shot'].apply(lambda x: x['outcome']['name'] if 'outcome' in x and  x != '' else '') 
events['event_shot_first_time'] = events['shot'].apply(lambda x: x['first_time'] if 'first_time' in x and  x != '' else '')
events['event_shot_technique_id'] = events['shot'].apply(lambda x: x['technique']['id'] if 'technique' in x and  x != '' else '')
events['event_shot_technique_name'] = events['shot'].apply(lambda x: x['technique']['name'] if 'technique' in x and  x != '' else '')
events['event_shot_aerial_won'] = events['shot'].apply(lambda x: x['aerial_won'] if 'aerial_won' in x and  x != '' else '')
events['event_shot_freeze_frame'] = events['shot'].apply(lambda x: x['freeze_frame'] if 'freeze_frame' in x and  x != '' else '')
events['event_shot_freeze_frame'] = events['shot'].apply(lambda x: x['freeze_frame'] if 'freeze_frame' in x and  x != '' else '')   
events['event_gk_end_loc_x'] = events['goalkeeper'].apply(lambda x: x['end_location'][0] if 'end_location' in x and  x != '' else '')   
events['event_gk_end_loc_y'] = events['goalkeeper'].apply(lambda x: x['end_location'][1] if 'end_location' in x and  x != '' else '')  
events['event_gk_pos_id'] = events['goalkeeper'].apply(lambda x: x['position']['id'] if 'position' in x and  x != '' else '')
events['event_gk_pos_name'] = events['goalkeeper'].apply(lambda x: x['position']['name'] if 'position' in x and  x != '' else '')   
events['event_gk_type_id'] = events['goalkeeper'].apply(lambda x: x['type']['id'] if 'type' in x and  x != '' else '')
events['event_gk_type_name'] = events['goalkeeper'].apply(lambda x: x['type']['name'] if 'type' in x and  x != '' else '')
events['event_gk_outcome_id'] = events['goalkeeper'].apply(lambda x: x['outcome']['id'] if 'outcome' in x and  x != '' else '')
events['event_gk_outcome_name'] = events['goalkeeper'].apply(lambda x: x['outcome']['name'] if 'outcome' in x and  x != '' else '')
events['event_gk_body_part_id'] = events['goalkeeper'].apply(lambda x: x['body_part']['id'] if 'body_part' in x and  x != '' else '')
events['event_gk_body_part_name'] = events['goalkeeper'].apply(lambda x: x['body_part']['name'] if 'body_part' in x and  x != '' else '')
events['event_gk_technique_id'] = events['goalkeeper'].apply(lambda x: x['technique']['id'] if 'technique' in x and  x != '' else '')
events['event_gk_technique_name'] = events['goalkeeper'].apply(lambda x: x['technique']['name'] if 'technique' in x and  x != '' else '')
events['event_duel_type_id'] = events['duel'].apply(lambda x: x['type']['id'] if 'type' in x and  x != '' else '')
events['event_duel_type_name'] = events['duel'].apply(lambda x: x['type']['name'] if 'type' in x and  x != '' else '')
events['event_duel_outcome_id'] = events['duel'].apply(lambda x: x['outcome']['id'] if 'outcome' in x and  x != '' else '')
events['event_duel_outcome_name'] = events['duel'].apply(lambda x: x['outcome']['name'] if 'outcome' in x and  x != '' else '')
events['event_dribble_overrun'] = events['dribble'].apply(lambda x: x['overrun'] if 'overrun' in x and  x != '' else False)
events['event_dribble_outcome_id'] = events['dribble'].apply(lambda x: x['outcome']['id'] if 'outcome' in x and  x != '' else '')
events['event_dribble_outcome_name'] = events['dribble'].apply(lambda x: x['outcome']['name'] if 'outcome' in x and  x != '' else '')
events['event_foul_commited_card_id'] = events['foul_committed'].apply(lambda x: x['card']['id'] if 'card' in x and  x != '' else '')
events['event_foul_commited_card_name'] = events['foul_committed'].apply(lambda x: x['card']['name'] if 'card' in x and  x != '' else '')
events['event_foul_advantage_played'] = events['foul_committed'].apply(lambda x: x['advantage'] if 'advantage' in x and  x != '' else '')
events['event_foul_type_id'] = events['foul_committed'].apply(lambda x: x['type']['id'] if 'type' in x and  x != '' else '')
events['event_foul_type_name'] = events['foul_committed'].apply(lambda x: x['type']['name'] if 'type' in x and  x != '' else '')
events['event_foul_interception_id'] = events['interception'].apply(lambda x: x['outcome']['id'] if 'outcome' in x and  x != '' else '')
events['event_foul_interception_name'] = events['interception'].apply(lambda x: x['outcome']['name'] if 'outcome' in x and  x != '' else '')
events['event_subs_outcome_id'] = events['substitution'].apply(lambda x: x['outcome']['id'] if 'outcome' in x and  x != '' else '')
events['event_subs_outcome_name'] = events['substitution'].apply(lambda x: x['outcome']['name'] if 'outcome' in x and  x != '' else '')
events['event_subs_replacement_id'] = events['substitution'].apply(lambda x: x['replacement']['id'] if 'replacement' in x and  x != '' else '')
events['event_subs_replacement_name'] = events['substitution'].apply(lambda x: x['replacement']['name'] if 'replacement' in x and  x != '' else '')



In [27]:
events.drop(['type', 'possession_team', 'play_pattern', 'team', 'tactics', 'related_events',
             'player', 'position', 'location', 'pass', 'carry', 'ball_receipt', 'ball_receipt',
             'ball_recovery', 'shot', 'goalkeeper', 'duel', 'dribble', 'foul_committed',
             'interception', 'substitution'], axis=1, inplace=True)

In [29]:
events.to_csv("La_liga_2019_20_events.csv", index=False)