In [2]:
import glob
import pandas as pd
import numpy as np
import os
import math
from pandas import json_normalize
import matplotlib.pyplot as plt
from matplotlib.patches import Arc
from matplotlib.animation import FuncAnimation
import pickle
from statsbombpy import sb
from datetime import datetime, timedelta

# Generating features

## Input requirements for this notebook:
- The list of match_id for all matches in Euro 2020 from Statsbomb open data.

In [3]:
#get a list of all match_id from local dir, this can otherwise be replaced by typing out a list.

sample_path_360 = "C:/Users/Huy's/OneDrive/1_school/qmul_2021-22/project/open-data-master/data/three-sixty/*.json"

files = [os.path.basename(x).split('.')[0] for x in glob.glob(sample_path_360)]

In [4]:
#to drop non-in-game events e.g. half start, half end, injury etc.
in_game_events = ['Pass','Ball Receipt*','Carry','Pressure',
          'Duel','Ball Recovery','Clearance','Foul Committed',
          'Foul Won','Dribble','Miscontrol','Dispossessed','Goal Keeper',
          'Block','Interception','Shot','Dribbled Past','50/50','Shield',
          'Error','Own Goal Against', 'Own Goal For', 'Referee Ball-Drop']


## Data Preprocessing: 
### Preparing labels (pass completion status, pass reward status)
- Passes completed will be marked as 1, otherwise 0.
- Passes that lead to a shot, or a pass from outside the box that leads to a ball receipt inside the box, within 15 seconds will be marked as reward = 1

In [5]:
def pass_risk(pass_outcome):
    return np.where(pass_outcome is np.NaN, 1, 0)

#function that creates a new column to capture risk label

def create_pass_risk(df):
    df['binary_pass_risk'] = df.apply(lambda x: pass_risk(x['pass_outcome']), axis=1)



In [6]:
#check if there is a ball_receipt event inside the opponent box within 15 seconds of the pass

def check_br_in_box(event_type, outcome, location):
    try: 
        check = (event_type=='Ball Receipt*') & (outcome is np.NaN) & (location[0]>102) & (location[0]<120) & (location[1]>16) & (location[1]<62)
        return check
    except TypeError:
        return False



def pass_reward(df, timestamp, possession_team, period, type_pass):
    if type_pass:
        series_all_chances=df.loc[((df['type']=='Shot')|(df['br_in_box'])) & 
                              (df['team']==possession_team) & (df['period']==period), 'timestamp']
        series_all_chances=series_all_chances.between(timestamp,timestamp+timedelta(seconds=15)).any()
        check = int(series_all_chances)
        
    else:
        check = np.NaN
    return check

#function that creates a new column to capture reward label

def create_pass_reward(dest_df, event_df):
    #create 'br_in_box'
    event_df['br_in_box']=event_df.apply(lambda x: check_br_in_box(x['type'], x['ball_receipt_outcome'], x['location']), axis=1)
    #create 'reward'
    dest_df['binary_pass_reward'] = dest_df.apply(lambda x: pass_reward(event_df, x['timestamp'], 
                                                                     x['possession_team'], x['period'], 
                                                                     x['type']=='Pass'), axis=1)



### Create features from events (and ball tracking data) 
- Location of the attempted ball receipt; 
- Whether ball was received under pressure

In [7]:
def br_loc(df, related_events, type_pass):
    if type_pass & isinstance(related_events, list):
        br_df = df.loc[(df['type']=='Ball Receipt*') & (df['id'].isin(related_events)),'location']
        result = (isinstance(br_df, list) and br_df) or (br_df.shape[0]>0 and br_df.iat[0]) or np.NaN
    else:
        result = np.NaN
    return result

def br_pressure(df, related_events, type_pass):
    if type_pass & isinstance(related_events, list):
        br_df = df.loc[(df['type']=='Ball Receipt*') & (df['id'].isin(related_events)),'under_pressure']
        result = (isinstance(br_df, list) and br_df) or (br_df.shape[0]>0 and br_df.iat[0]) or np.NaN
    else:
        result = np.NaN
    return result

#get ball receipt location of the pass

def create_br_loc(dest_df, event_df):
    dest_df['pass_br_location'] = dest_df.apply(lambda x: br_loc(event_df, x['related_events'], 
                                                       x['type']=='Pass'), axis=1)
#flag whether the pass was received under pressure
    
def create_br_pressure(dest_df, event_df):
    dest_df['br_pressure'] = dest_df.apply(lambda x: br_pressure(event_df, x['related_events'], x['type']=='Pass'), axis=1)



- One touch pass - inferred from whether there is a carry event related to the pass

In [8]:
def create_one_touch_pass(dest_df, event_df):
    carry_df = event_df.loc[(event_df['type']=='Carry'),'related_events'].explode()
#     pass_df = dest_df.loc[(event_df['type']=='Pass'), ['id', 'possession']]
#     pass_df['no_pass_in_possession']=dest_df.groupby(['possession']).cumcount()+1
#     pass_df = pass_df.loc[pass_df['no_pass_in_possession']==1, 'id']
    series = (~dest_df['id'].isin(carry_df))&(dest_df['type']=='Pass')&(
        (dest_df['pass_type'].isnull())|(dest_df['pass_type'].isin(['Recovery','Interception'])))
    dest_df['one_touch_pass']=series.astype(int)


- Pass from duel - inferred from whether there is a duel event related to the pass

In [9]:
def create_pass_from_duel(dest_df, event_df):
    duel_df = event_df.loc[(event_df['type'].isin(['Duel','50-50'])),'related_events'].explode()
    series = (dest_df['id'].isin(duel_df))&(dest_df['type']=='Pass')
    dest_df['pass_from_duel']=series.astype(int)

- Carry speed before pass - average speed of the carry before the pass, calculated by dividing the distance of carry (in yards) over the duration.

2 reusable functions for later feature calculations as well

In [10]:
#reusable funcs
def divide(numer, denomi):
    try:
        return numer/denomi
    except ZeroDivisionError:
        return 0
def dist(start, end):
    start, end = np.array(start), np.array(end)
    return np.linalg.norm(start-end)   

#carry speed and carry distance before pass

def create_carry_dist_speed(dest_df, event_df):
    carry_df = event_df.loc[(event_df['type']=='Carry')].explode('related_events')
    carry_df['carry_dist']=carry_df.apply(lambda x: dist(x['location'], x['carry_end_location']), axis=1)
    carry_df['carry_speed']=carry_df.apply(lambda x: divide(x['carry_dist'], x['duration']), axis=1)
    pass_df = dest_df[['id', 'type']]
    pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1) 
    carry_df = carry_df.loc[carry_df['related_events'].isin(pass_df['id']),['related_events','carry_dist','carry_speed']]
    
    
    dest_df['carry_dist'] = pass_df['id'].map(carry_df.set_index('related_events')['carry_dist'])
    dest_df['carry_speed'] = pass_df['id'].map(carry_df.set_index('related_events')['carry_speed'])

* Pass end location: If there is ball_receipt_location: ball_receipt_location, else pass_end_location

In [11]:
def create_pass_receipt_loc(df):
    df['pass_receipt_loc']=np.where(~df['pass_br_location'].isnull(),df['pass_br_location'], df['pass_end_location'])

- Pass intention - deprecated

In [12]:
def pass_intention(throughball, switch, cut_back, cross):
    if throughball:
        return 'Through ball'
    if switch:
        return 'Switch'
    if cut_back:
        return 'Cut back'
    if cross:
        return 'Cross'
    
def create_pass_intention(df):
    df['pass_intention'] = df.apply(lambda x: pass_intention(x['pass_through_ball'],x['pass_switch'], 
                                                             x['pass_cut_back'], x['pass_cross']), 
                                    axis=1)


- Pass angle
- passer and ball receipt degree to goal 
(more details of the conventions of measuring angle, please refer to Statsbomb open data documentation)

In [13]:
def pi_to_degree(x):
    return x*180/math.pi

def categorical_pass_angle(angle, threshold):
    if abs(angle)<threshold/3:
        return 'Forward'
    if abs(angle)<threshold/2:
        return 'Sideways'
    if abs(angle)<threshold:
        return 'Backward'
    
def angle_to_goal(loc):
    if isinstance(loc,list):
        vector = [120-loc[0], 40-loc[1]]
        return np.degrees(np.arctan2(vector[1], vector[0]))
    else:
        return np.NaN

# add pass angle 
def create_pass_angle(df):
    df['pass_degree']=df.apply(lambda x: pi_to_degree(x['pass_angle']), axis=1)
    df['pass_angle_cat']=df.apply(lambda x: categorical_pass_angle(x['pass_degree'],180), axis=1)

#add degree from passer or ball receipt to goal

def create_angle_to_goal(df):
    df['passer_degree_to_goal']=df.apply(lambda x: angle_to_goal(x['location']), axis=1)
    df['br_degree_to_goal']=df.apply(lambda x: angle_to_goal(x['pass_receipt_loc']), axis=1)

- Split coordinates

In [14]:
def get_1d_coor(coor, dim):
    dim_dict = {'x':0, 'y':1}
    try:
        dim=dim_dict[dim]
        if isinstance(coor,list):
            return coor[dim]
        else:
            return np.NaN
    except:
        return np.NaN


#split coordinates

def create_1d_coor_column(df):
    df['pass_loc_x']=df.apply(lambda x: get_1d_coor(x['location'], 'x') if x['type']=='Pass' else np.NaN, axis=1)
    df['pass_loc_y']=df.apply(lambda x: get_1d_coor(x['location'], 'y') if x['type']=='Pass' else np.NaN, axis=1)
    df['pass_receipt_loc_x']=df.apply(lambda x: get_1d_coor(x['pass_receipt_loc'], 'x') if x['type']=='Pass' else np.NaN, 
                                      axis=1)
    df['pass_receipt_loc_y']=df.apply(lambda x: get_1d_coor(x['pass_receipt_loc'], 'y') if x['type']=='Pass' else np.NaN, 
                                      axis=1)

- Flag dead ball

In [15]:
def dead_ball(pass_type):
    dead_ball = not((pass_type is np.NaN)|(pass_type in ['Recovery', 'Interception']))
    return dead_ball

def create_dead_ball(df):
    df['deadball']=df.apply(lambda x: dead_ball(x['pass_type']), axis=1)
    

- Time since the start of current possession chain

In [16]:
def get_poss_timestamp(df):
    series = df.groupby(by="possession")["timestamp"].min()
    return series.to_dict()

def time_since_poss(timestamp, possession, event_type, poss_dct):
    return (timestamp-poss_dct[possession]).total_seconds() if event_type=='Pass' else np.NaN

def create_time_since_poss(dest_df, event_df):
    poss_timestamp = get_poss_timestamp(event_df)
    dest_df['time_since_poss']=dest_df.apply(lambda x: time_since_poss(x['timestamp'], x['possession'], x['type'], poss_timestamp), 
                                                     axis=1)

- Body part used to pass the ball with

In [17]:
def from_keeper_held(body_part):
    return 1 if body_part in ['Drop Kick','Keeper Arm'] else np.NaN

def group_body_part(body_part):
    if body_part in ['Drop Kick', 'Left Foot', 'Right Foot']:
        body_part = 'Foot'
    elif body_part=='Head':
        pass
    else:
        body_part='Other'
    return body_part

def create_body_part(df):
    df['from_keeper_held']=df.apply(lambda x: from_keeper_held(x['pass_body_part']), axis=1)
    df['pass_body_part_gr']=df.apply(lambda x: group_body_part(x['pass_body_part']), axis=1)
        

#### Add all functions together to create event features:

In [18]:
def get_event_features(dest_df, event_df):
    
    create_pass_risk(dest_df)
    create_pass_reward(dest_df, event_df)
    create_br_loc(dest_df, event_df)
    create_br_pressure(dest_df, event_df)
    create_one_touch_pass(dest_df, event_df)
    create_pass_from_duel(dest_df, event_df)
    create_carry_dist_speed(dest_df, event_df)
#     create_pass_intention(dest_df)
    create_pass_angle(dest_df)
    create_pass_receipt_loc(dest_df)
    create_angle_to_goal(dest_df)
    create_1d_coor_column(dest_df)
    create_dead_ball(dest_df)
    create_time_since_poss(dest_df, event_df)
    create_body_part(dest_df)


### Create features from tracking data 
First the underlying data is calculated for each row of tracking data:
- Dist and degree from defending team's goal
- Dist and degree from attacking team's goal 
- Dist and degree from actor and ball receipt location to goal
- Dist from actor, degree from actor, difference between the angle of opponent against actor vs passing_angle
- Dist from ball receipt location, degree from ball receipt location, difference between the angle of opponent against ball receipt location vs goal

In [19]:
def get_angle(start_location, end_location):
    y= end_location[1]-start_location[1]
    x= end_location[0]-start_location[0]
    angles = np.degrees(np.arctan2(y, x))
    return angles

def degree_diff(d1, d2):
    return abs(d1-d2) if abs(d1-d2)<180 else 360-abs(d1-d2)


def map_event_to_tracking(tracking_df, event_df):
    event_df = event_df.set_index('id')[['type', 'pass_length', 'location', 'pass_end_location', 'pass_degree', 'passer_degree_to_goal', 'br_degree_to_goal']]
    tracking_df = tracking_df.set_index('id')
    tracking_df = tracking_df.join(event_df, how='left', rsuffix='_pass')
    tracking_df_pass = tracking_df.loc[tracking_df['type']=='Pass']
    return tracking_df_pass

def create_features_each_player(tracking_df, event_df):
    tracking_df = map_event_to_tracking(tracking_df, event_df)
 
    #map actor_location for every line
    tracking_actor = tracking_df.loc[tracking_df['actor'], ['location']]
    tracking_df['actor_location'] = tracking_df.index.map(tracking_actor['location'])
    
    tracking_df['actor_location'] = tracking_df['actor_location'].fillna(tracking_df['location_pass'])
    
    #adjust pass_end_location with difference between ball position and actor position
    tracking_df['pass_end_location'] = tracking_df.apply(lambda x: 
                                                         [x['pass_end_location'][i]-(x['location_pass'][i]-x['actor_location'][i]) 
                                                          for i in range(2)], axis=1)
    #get dist and degree from oppo goal
    
    tracking_df['dist_from_def_goal']=tracking_df.apply(lambda x: dist(x['location'], [120, 40]), axis=1)
    tracking_df['degree_from_def_goal'] = tracking_df.apply(lambda x: get_angle(x['location'], [120, 40]), 
                                                         axis=1)
    tracking_df['dist_from_att_goal']=tracking_df.apply(lambda x: dist(x['location'], [0, 40]), axis=1)
    tracking_df['degree_from_att_goal'] = tracking_df.apply(lambda x: get_angle(x['location'], [0, 40]), 
                                                         axis=1)
    
    #get dist from actor and br to goal
    tracking_df['dist_actor_from_goal']=tracking_df.apply(lambda x: dist(x['actor_location'], [120, 40]), axis=1)
    tracking_df['dist_br_from_goal']=tracking_df.apply(lambda x: dist(x['pass_end_location'], [120, 40]), axis=1)
    
    #get dist from actor, degree from actor, difference between the angle of opponent vs actor vs passing_angle
    tracking_df['dist_from_actor']=tracking_df.apply(lambda x: dist(x['location'], x['actor_location']), axis=1)
    tracking_df['degree_from_actor'] = tracking_df.apply(lambda x: get_angle(x['location'], x['actor_location']), 
                                                         axis=1)
    tracking_df['degree_diff_pass_actor']=tracking_df.apply(lambda x: degree_diff(x['degree_from_actor'],x['pass_degree']), axis=1)
    
    #get dist from br, degree from br, difference between the angle of opponent vs br vs goal
    tracking_df['degree_from_br'] = tracking_df.apply(lambda x: get_angle(x['location'], x['pass_end_location']), 
                                                      axis=1)
    tracking_df['dist_from_br']=tracking_df.apply(lambda x: dist(x['location'], x['pass_end_location']), axis=1)
    
    tracking_df['degree_diff_goal_br']=tracking_df.apply(lambda x: degree_diff(x['degree_from_br'],x['passer_degree_to_goal']), axis=1)
    return tracking_df



Generate feature for each pass
- Number of oppositions 5 feet away from actor
- Minimum degree difference of an opponent to the actor vs. the passing degree
- Minimum opponent distance from the actor
- Degree difference of the closest opponent to the actor vs. the passing degree

In [20]:
def get_feature_for_closest(tracking_df, dist_metric, retrieved_metric):
    index_lst = tracking_df.groupby(['id', 'teammate'])[dist_metric].idxmin(skipna=False)
    index_lst = index_lst.dropna()
    grouped_df = tracking_df.loc[index_lst]
    return grouped_df.set_index(['id', 'teammate'])[retrieved_metric]

def create_passer_features(tracking_df):
    
    tracking_df = tracking_df.loc[tracking_df['actor']==False].reset_index()

    def f(x):
        d = {}
        d['no_players'] = (x['dist_from_actor']).count()
        d['no_5ft_away_from_actor'] = (x['dist_from_actor']<5).sum()
        d['no_closer_to_goal_actor'] = (x['dist_from_def_goal']<x['dist_actor_from_goal']).sum()
        d['min_degree_diff_pass_actor'] = (np.where(x['dist_from_actor']<=x['pass_length'], x['degree_diff_pass_actor'],
                                                   360)).min() 
        d['min_dist_from_actor'] = x['dist_from_actor'].min()
        return pd.Series(d, index=['no_players', 'no_5ft_away_from_actor', 'no_closer_to_goal_actor',
                                   'min_degree_diff_pass_actor','min_dist_from_actor'])

    tracking_grouped = tracking_df.groupby(['id','teammate']).apply(f)
    tracking_grouped['closest_from_actor_degree_diff'] = get_feature_for_closest(tracking_df, 'dist_from_actor', 'degree_diff_pass_actor')
    tracking_grouped = tracking_grouped.reset_index()
    
    return tracking_grouped


    


Generate feature for each pass
- Number of oppositions 5 feet away from ball receipt location
- Minimum degree difference of an opponent to the ball receipt location vs. opponent goal to ball receipt location
- Minimum opponent distance from the ball receipt location 
- Number of opposition closer to goal than bal receipt location

In [21]:
def create_br_features(tracking_df):
    
    tracking_df = tracking_df.reset_index()

    def f(x):
        d = {}
        d['no_5ft_away_from_br'] = (x['dist_from_br']<5).sum()
        d['min_dist_from_br'] = x['dist_from_br'].min()
        #min degree diff from br to goal vs from br vs oppo (only counts oppos who are closer to br than from br to goal)
        d['min_degree_diff_goal_br'] = (np.where(x['dist_from_br']<=x['dist_br_from_goal'], x['degree_diff_goal_br'],
                                                   360)).min() 
        d['no_closer_to_goal_br'] = (x['dist_from_def_goal']<x['dist_br_from_goal']).sum()

        return pd.Series(d, index=['no_5ft_away_from_br', 'min_dist_from_br','min_degree_diff_goal_br','no_closer_to_goal_br'])

    tracking_grouped = tracking_df.groupby(['id','teammate']).apply(f)
    tracking_grouped['closest_from_br_degree'] = get_feature_for_closest(tracking_df, 'dist_from_br', 'degree_from_br')
    tracking_grouped = tracking_grouped.reset_index()
    
    return tracking_grouped

### Generate all tracking features

In [30]:
def get_tracking_features(dest_df, tracking_df, event_df):
    tracking_df = create_features_each_player(tracking_df, event_df)
    passer_grouped = create_passer_features(tracking_df).set_index(['id', 'teammate'])
    br_grouped = create_br_features(tracking_df).set_index(['id', 'teammate'])
    grouped = passer_grouped.join(br_grouped, how='outer').reset_index()
    grouped['pc_5ft_away_from_actor'] = grouped.apply(lambda x: divide(x['no_5ft_away_from_actor'], x['no_players']), axis=1)
    grouped['pc_5ft_away_from_br'] = grouped.apply(lambda x: divide(x['no_5ft_away_from_br'], x['no_players']), axis=1)
    grouped['pc_closer_to_goal_actor'] = grouped.apply(lambda x: divide(x['no_closer_to_goal_actor'], x['no_players']), axis=1)
    grouped['pc_closer_to_goal_br'] = grouped.apply(lambda x: divide(x['no_closer_to_goal_br'], x['no_players']), axis=1)

    grouped_oppo = grouped.loc[~grouped['teammate']].set_index('id')
    grouped_teammate = grouped.loc[grouped['teammate']].set_index('id')

    dest_df = dest_df.set_index('id')
    dest_df['pc_5ft_actor_oppo'] = dest_df.index.map(grouped_oppo['pc_5ft_away_from_actor'])
    dest_df['pc_oppo_closer_to_goal_passer'] = dest_df.index.map(grouped_oppo['pc_closer_to_goal_actor'])
    dest_df['oppo_min_dist_passer'] = dest_df.index.map(grouped_oppo['min_dist_from_actor'])
    dest_df['min_degree_diff_passer_oppo'] = dest_df.index.map(grouped_oppo['min_degree_diff_pass_actor'])
    dest_df['closest_oppo_degree_diff_passer'] = dest_df.index.map(grouped_oppo['closest_from_actor_degree_diff'])
    
    
    dest_df['pc_5ft_br_oppo'] = dest_df.index.map(grouped_oppo['pc_5ft_away_from_br'])
    dest_df['pc_oppo_closer_to_goal_br'] = dest_df.index.map(grouped_oppo['pc_closer_to_goal_br'])
    dest_df['oppo_min_dist_br'] = dest_df.index.map(grouped_oppo['min_dist_from_br'])
    dest_df['min_degree_diff_br_oppo'] = dest_df.index.map(grouped_oppo['min_degree_diff_goal_br'])
    dest_df['closest_oppo_degree_diff_br'] = dest_df.index.map(grouped_oppo['closest_from_br_degree'])
    
    dest_df['min_dist_from_br_teammate'] = dest_df.index.map(grouped_teammate['min_dist_from_br'])
    return dest_df, tracking_df

## Loop through all matches to create event and tracking features

In [31]:
features_all_matches = []
all_tracking_data = []
i=0
for match in files[i:]:
    event_df = sb.events(match_id=match, flatten_attrs=True).sort_values('index')
    event_df = event_df.loc[event_df['type'].isin(in_game_events)]
    event_dict = sb.events(match_id=match, split=True, flatten_attrs=True)
    tracking_df = sb.frames(match_id=match)

    pass_columns = event_dict['passes'].columns
    pass_df = event_df.loc[event_df['type']=='Pass', pass_columns]


    event_df['timestamp']=event_df.apply(lambda x: pd.to_datetime(x['timestamp']), axis=1)
    pass_df['timestamp']=pass_df.apply(lambda x: pd.to_datetime(x['timestamp']), axis=1)

    get_event_features(pass_df, event_df)
    pass_df, tracking_df = get_tracking_features(pass_df, tracking_df, pass_df)
    features_all_matches.append(pass_df)
    all_tracking_data.append(tracking_df)

    print(i)

    i+=1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


2


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


4


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


6


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


7


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


8


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


9


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


10


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


11


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


12


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


13


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


14


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


15


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


16


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


17


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


18


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


19


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


20


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


21


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


22


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


23


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


24


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


25


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


26


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


27


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


28


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


29


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


30


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


31


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


32


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


33


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


34


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


35


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


36


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


37


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


38


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


39


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


40


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


41


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


42


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


43


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


44


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


45


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


46


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


47


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


48


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


49


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pass_df['id']=pass_df.apply(lambda x: x['id'] if x['type']=='Pass' else np.NaN, axis=1)


50


### Append all matches to 1 df

In [36]:
tracking_data = pd.concat(all_tracking_data)
tracking_data = tracking_data.reset_index()
appended_data = pd.concat(features_all_matches)
# appended_data 
# tracking_data.info()

In [49]:
tracking_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 729573 entries, 0 to 729572
Data columns (total 29 columns):
 #   Column                  Non-Null Count   Dtype  
---  ------                  --------------   -----  
 0   id                      729573 non-null  object 
 1   teammate                729573 non-null  object 
 2   actor                   729573 non-null  object 
 3   keeper                  729573 non-null  bool   
 4   location                729573 non-null  object 
 5   visible_area            729573 non-null  object 
 6   match_id                729573 non-null  object 
 7   type                    729573 non-null  object 
 8   pass_length             729573 non-null  float64
 9   location_pass           729573 non-null  object 
 10  pass_end_location       729573 non-null  object 
 11  pass_degree             729573 non-null  float64
 12  passer_degree_to_goal   729573 non-null  float64
 13  br_degree_to_goal       729573 non-null  float64
 14  actor_location      

Unnamed: 0,teammate,keeper,dist_from_def_goal,degree_from_def_goal,dist_from_att_goal,degree_from_att_goal,dist_from_actor,degree_from_actor,degree_diff_pass_actor,degree_from_br,dist_from_br,degree_diff_goal_br,loc_x,loc_y
0,True,False,107.796046,14.097301,30.464443,120.474949,12.784305,4.449192,40.476567,-17.446314,26.043841,34.443493,15.450395,13.744190
1,False,False,106.052540,18.817579,39.433044,119.831376,12.394057,46.188593,82.215967,0.398482,20.680743,16.598698,19.615932,5.792103
2,True,False,100.094700,-0.320342,19.914729,-178.389707,27.121503,-72.203561,36.176187,-59.506885,40.181143,76.504065,19.906864,40.559628
3,True,False,102.070033,13.714230,31.935577,130.735074,7.432964,-8.240548,27.786826,-26.887489,21.814441,43.884669,20.839956,15.801295
4,False,False,99.040646,1.821617,21.243983,171.477558,23.254188,-71.997848,35.970474,-58.042073,36.438507,75.039253,21.009405,36.851710
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
729568,True,False,28.828032,16.954085,92.806380,174.803023,4.778083,155.279545,155.870202,3.923494,24.817952,15.875382,92.424870,31.593594
729569,True,False,35.873525,-39.915893,95.306986,-166.023639,29.754197,-98.505568,97.914911,-50.277867,38.648949,70.076743,92.485466,63.018692
729570,False,False,26.015491,14.641998,95.057130,176.033044,6.746818,178.573942,179.164599,-0.338546,22.355662,20.137422,94.829384,33.423840
729571,False,False,28.243955,-28.079240,96.005334,-172.040467,20.907604,-109.548477,108.957820,-42.142442,29.810993,61.941319,95.080430,53.294210


In [38]:
print(appended_data.columns)

Index(['index', 'period', 'timestamp', 'minute', 'second', 'type',
       'possession', 'possession_team', 'play_pattern', 'team', 'player',
       'position', 'location', 'duration', 'related_events', 'match_id',
       'pass_recipient', 'pass_length', 'pass_angle', 'pass_height',
       'pass_end_location', 'pass_type', 'pass_body_part',
       'possession_team_id', 'player_id', 'pass_switch', 'pass_outcome',
       'off_camera', 'under_pressure', 'pass_cross', 'pass_assisted_shot_id',
       'pass_shot_assist', 'pass_aerial_won', 'pass_through_ball',
       'pass_technique', 'counterpress', 'pass_outswinging', 'pass_deflected',
       'pass_no_touch', 'out', 'pass_inswinging', 'pass_goal_assist',
       'binary_pass_risk', 'binary_pass_reward', 'pass_br_location',
       'br_pressure', 'one_touch_pass', 'pass_from_duel', 'carry_dist',
       'carry_speed', 'pass_degree', 'pass_angle_cat', 'pass_receipt_loc',
       'passer_degree_to_goal', 'br_degree_to_goal', 'pass_loc_x',
       '

## Create a table called master_data for future uses

In [39]:
master_data = appended_data.drop(['second', 'type','possession', 'possession_team', 'pass_body_part',
                                  'play_pattern', 'team', 'player', 'position', 'pass_br_location', 
                                  'duration', 'related_events', 'match_id','pass_recipient',
                                  'pass_angle', 'pass_end_location', 'possession_team_id', 'pass_type',
                                  'player_id', 'pass_outcome', 'off_camera', 'pass_assisted_shot_id',
                                  'pass_shot_assist', 'pass_aerial_won', 'pass_technique', 'counterpress',
                                  'pass_outswinging', 'pass_deflected','pass_no_touch', 'out','pass_inswinging', 
                                  'pass_goal_assist', 'pass_straight', 'pass_miscommunication'], axis=1)

In [40]:
print(master_data.columns)
master_data.info()

Index(['index', 'period', 'timestamp', 'minute', 'location', 'pass_length',
       'pass_height', 'pass_switch', 'under_pressure', 'pass_cross',
       'pass_through_ball', 'binary_pass_risk', 'binary_pass_reward',
       'br_pressure', 'one_touch_pass', 'pass_from_duel', 'carry_dist',
       'carry_speed', 'pass_degree', 'pass_angle_cat', 'pass_receipt_loc',
       'passer_degree_to_goal', 'br_degree_to_goal', 'pass_loc_x',
       'pass_loc_y', 'pass_receipt_loc_x', 'pass_receipt_loc_y', 'deadball',
       'time_since_poss', 'from_keeper_held', 'pass_body_part_gr',
       'pc_5ft_actor_oppo', 'pc_oppo_closer_to_goal_passer',
       'oppo_min_dist_passer', 'min_degree_diff_passer_oppo',
       'closest_oppo_degree_diff_passer', 'pc_5ft_br_oppo',
       'pc_oppo_closer_to_goal_br', 'oppo_min_dist_br',
       'min_degree_diff_br_oppo', 'closest_oppo_degree_diff_br',
       'min_dist_from_br_teammate', 'pass_cut_back'],
      dtype='object')
<class 'pandas.core.frame.DataFrame'>
Index: 54

#### Treat missing values

In [41]:
master_data['pass_body_part_gr'] = master_data['pass_body_part_gr'].fillna('Other')

master_data['pass_switch'] = master_data['pass_switch'].fillna(False)
master_data['under_pressure'] = master_data['under_pressure'].fillna(False)
master_data['from_keeper_held'] = master_data['from_keeper_held'].fillna(False)
master_data['pass_cross'] = master_data['pass_cross'].fillna(False)
master_data['pass_through_ball'] = master_data['pass_through_ball'].fillna(False)
master_data['br_pressure'] = master_data['br_pressure'].fillna(False)
master_data['pass_cut_back'] = master_data['pass_cut_back'].fillna(False)

master_data['carry_dist'] = master_data['carry_dist'].fillna(0)
master_data['carry_speed'] = master_data['carry_speed'].fillna(0)

#### Manual data adjustment due to numbers rounding up - pass angle

In [42]:
master_data['pass_degree'] = np.where(master_data['pass_degree']>180, 180, master_data['pass_degree'])
master_data['pass_angle_cat'] = master_data['pass_angle_cat'].fillna('Backward')

#### Convert type

In [43]:
dtypes = {'timestamp': str, 
          'pass_switch': int,
          'under_pressure': int,
          'pass_cross': int,
          'pass_through_ball': int,
          'binary_pass_risk': int, 
          'br_pressure': int, 
          'deadball':int,
          'from_keeper_held':int,
          'pass_cut_back': int
         }

master_data=master_data.astype(dtypes)

In [44]:
master_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 54820 entries, bbc398f7-c784-4958-a504-37b583caf97a to e6fa5b12-3683-4e25-912b-30180e4fc752
Data columns (total 43 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   index                            54820 non-null  int64  
 1   period                           54820 non-null  int64  
 2   timestamp                        54820 non-null  object 
 3   minute                           54820 non-null  int64  
 4   location                         54820 non-null  object 
 5   pass_length                      54820 non-null  float64
 6   pass_height                      54820 non-null  object 
 7   pass_switch                      54820 non-null  int32  
 8   under_pressure                   54820 non-null  int32  
 9   pass_cross                       54820 non-null  int32  
 10  pass_through_ball                54820 non-null  int32  
 11  binary_pass_risk   

#### Save this data to json file for future uses
- dir pyg_data/raw/ must be created beforehand, since this will be used in generating graph representations

In [45]:
master_data_dict = master_data.to_dict()
tracking_data_dict = tracking_data.to_dict()
import json
with open("master_data.json", "w") as outfile:
    json.dump(master_data_dict, outfile)
    
with open("pyg_data/raw/tracking_data.json", "w") as outfile:
    json.dump(tracking_data_dict, outfile)