In [1]:
import pandas as pd
import sqlite3
import json
from ast import literal_eval


In [2]:
from  analysis_tool import whoscored_data_engineering as wcde
import analysis_tool.whoscored_custom_events as ws_custom_events


In [3]:
data = pd.read_csv('Data/liverpool03_15_23.csv', index_col=0, low_memory=False)

In [4]:
data['qualifiers'] = [literal_eval(x) for x in data['qualifiers']]
data['satisfiedEventsTypes'] = [literal_eval(x) for x in data['satisfiedEventsTypes']]

In [5]:
data.loc[:, 'is_open_play'] = True
for index, record in enumerate(data['qualifiers']):
    for attr in record:
        if isinstance(attr, dict):
            key_search = list(attr.keys())[0]
            if key_search in ['GoalKick', 'FreekickTaken', 'CornerTaken', 'ThrowIn']:
                data.at[index, 'is_open_play'] = False


In [6]:
data['is_open_play'].value_counts()

True     1793
False     102
Name: is_open_play, dtype: int64

In [7]:
def check_if_pass_is_progressive(x, y, end_x, end_y):
    '''
    This function returns "True" if the pass meets the criteria
    for a progressive pass.
    '''
    # -- Start position
    height_start = abs(x - 100)
    length_start = abs(y - 50)
    distance_sq_start = height_start**2 + length_start**2
    distance_start = distance_sq_start**(1/2)
    # -- End position
    height_end = abs(end_x - 100)
    length_end = abs(end_y - 50)
    distance_sq_end = height_end**2 + length_end**2
    distance_end = distance_sq_end**(1/2)
    # -- Calculate change in distance
    delta_distance = distance_end/distance_start - 1
    if delta_distance <= -0.25:
        return True
    else:
        return False

In [8]:
data['is_progressive'] = data.apply(lambda x: check_if_pass_is_progressive(x['x'], x['y'], x['endX'], x['endY']), axis=1)


In [9]:
def check_if_pass_is_into_box(x, y, end_x, end_y):
    '''
    This function returns "True" if the pass meets the criteria
    for a progressive pass and is successful into the box.
    '''
    # -- Start position
    height_start = abs(x - 100)
    length_start = abs(y - 50)
    distance_sq_start = height_start**2 + length_start**2
    distance_start = distance_sq_start**(1/2)
    # -- End position
    height_end = abs(end_x - 100)
    length_end = abs(end_y - 50)
    distance_sq_end = height_end**2 + length_end**2
    distance_end = distance_sq_end**(1/2)
    # -- Calculate change in distance
    delta_distance = distance_end/distance_start - 1
    # -- Determine pass end position and whether it's a successful pass into the box
    x_position = 120 * end_x / 100
    y_position = 80 * end_y / 100
    if delta_distance <= -0.25 and x_position >= 102 and 18 <= y_position <= 62:
        return True
    else:
        return False


In [10]:
data['is_pass_into_box'] = data.apply(lambda x: check_if_pass_is_into_box(x['x'], x['y'], x['endX'], x['endY']), axis=1)


In [11]:
data['is_progressive'].value_counts()

False    1696
True      199
Name: is_progressive, dtype: int64

In [12]:
data['is_pass_into_box'].value_counts()

False    1825
True       70
Name: is_pass_into_box, dtype: int64

In [13]:
# We define a set that covers all (I think) recovery ball actions:
recovery_set = set(['ballRecovery','interceptionWon','tackleWon','foulGiven','duelAerialWon'])
data = data.copy()
data['won_possession'] = False
for index, row in enumerate(data['satisfiedEventsTypes']):
    set_element = set(row)
    if len(recovery_set.intersection(set_element)) > 0:
        data.at[index, 'won_possession'] = True

In [14]:
data['won_possession'].value_counts()


False    1732
True      163
Name: won_possession, dtype: int64

In [15]:
data['is_carry_into_box'] = data.apply(ws_custom_events.carry_into_box, axis=1)


In [16]:
data['is_carry_into_box'].value_counts()


True    9
Name: is_carry_into_box, dtype: int64

In [17]:
data['is_pass_into_box'] = data.apply(ws_custom_events.pass_into_box, axis=1)


In [18]:
data['progressive_carry'] = data.apply(ws_custom_events.progressive_carry, axis=1)

In [19]:
data['progressive_carry'].value_counts()

True    29
Name: progressive_carry, dtype: int64

In [21]:
data.to_csv('Data/liverpool03_15_23.csv')