In [2]:
import pandas as pd
import sqlite3
import json
from ast import literal_eval


In [3]:
from  analysis_tools import whoscored_data_engineering as wcde
import analysis_tools.whoscored_custom_events as ws_custom_events


In [4]:
data = pd.read_csv('./Data/liverpool03_11_23.csv', index_col=0, low_memory=False)

In [5]:
data['qualifiers'] = [literal_eval(x) for x in data['qualifiers']]
data['satisfiedEventsTypes'] = [literal_eval(x) for x in data['satisfiedEventsTypes']]

AttributeError: 'str' object has no attribute 'keys'

In [6]:
data.loc[:, 'is_open_play'] = True
for index, record in enumerate(data['qualifiers']):
    for attr in record:
        if isinstance(attr, dict):
            key_search = list(attr.keys())[0]
            if key_search in ['GoalKick', 'FreekickTaken', 'CornerTaken', 'ThrowIn']:
                data.at[index, 'is_open_play'] = False


In [7]:
data['is_open_play'].value_counts()

True     1667
False     103
Name: is_open_play, dtype: int64

In [8]:
def check_if_pass_is_progressive(x, y, end_x, end_y):
    '''
    This function returns "True" if the pass meets the criteria
    for a progressive pass.
    '''
    # -- Start position
    height_start = abs(x - 100)
    length_start = abs(y - 50)
    distance_sq_start = height_start**2 + length_start**2
    distance_start = distance_sq_start**(1/2)
    # -- End position
    height_end = abs(end_x - 100)
    length_end = abs(end_y - 50)
    distance_sq_end = height_end**2 + length_end**2
    distance_end = distance_sq_end**(1/2)
    # -- Calculate change in distance
    delta_distance = distance_end/distance_start - 1
    if delta_distance <= -0.25:
        return True
    else:
        return False

In [9]:
data['is_progressive'] = data.apply(lambda x: check_if_pass_is_progressive(x['x'], x['y'], x['endX'], x['endY']), axis=1)


In [10]:
def check_if_pass_is_into_box(x, y, end_x, end_y):
    '''
    This function returns "True" if the pass meets the criteria
    for a progressive pass and is successful into the box.
    '''
    # -- Start position
    height_start = abs(x - 100)
    length_start = abs(y - 50)
    distance_sq_start = height_start**2 + length_start**2
    distance_start = distance_sq_start**(1/2)
    # -- End position
    height_end = abs(end_x - 100)
    length_end = abs(end_y - 50)
    distance_sq_end = height_end**2 + length_end**2
    distance_end = distance_sq_end**(1/2)
    # -- Calculate change in distance
    delta_distance = distance_end/distance_start - 1
    # -- Determine pass end position and whether it's a successful pass into the box
    x_position = 120 * end_x / 100
    y_position = 80 * end_y / 100
    if delta_distance <= -0.25 and x_position >= 102 and 18 <= y_position <= 62:
        return True
    else:
        return False


In [11]:
data['is_pass_into_box'] = data.apply(lambda x: check_if_pass_is_into_box(x['x'], x['y'], x['endX'], x['endY']), axis=1)


In [12]:
data['is_progressive'].value_counts()

False    1566
True      204
Name: is_progressive, dtype: int64

In [13]:
data['is_pass_into_box'].value_counts()

False    1706
True       64
Name: is_pass_into_box, dtype: int64

In [14]:
# We define a set that covers all (I think) recovery ball actions:
recovery_set = set(['ballRecovery','interceptionWon','tackleWon','foulGiven','duelAerialWon'])
data = data.copy()
data['won_possession'] = False
for index, row in enumerate(data['satisfiedEventsTypes']):
    set_element = set(row)
    if len(recovery_set.intersection(set_element)) > 0:
        data.at[index, 'won_possession'] = True

In [15]:
data['won_possession'].value_counts()


False    1550
True      220
Name: won_possession, dtype: int64

In [19]:
ws_custom_events.carry_into_box(data)

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [16]:
data['is_carry_into_box'] = data.apply(ws_custom_events.carry_into_box, axis=1)


In [18]:
data['is_carry_into_box'].value_counts()


True    6
Name: is_carry_into_box, dtype: int64

Unnamed: 0,eventId,minute,second,teamId,x,y,expandedMinute,period,type,outcomeType,...,cumulative_mins,index,id,match_string,team_name,Venue,xThreat,xThreat_gen,playerName,is_carry_into_box
0,2.0,0.0,0.0,32,0.0,0.0,0.0,1.0,,Successful,...,0.00,0.0,2.516083e+09,Liverpool - Man Utd,Man Utd,Away,,,Unknown,
1,2.0,0.0,0.0,26,0.0,0.0,0.0,1.0,,Successful,...,0.00,1.0,2.516083e+09,Liverpool - Man Utd,Liverpool,Home,,,Unknown,
2,3.0,0.0,0.0,32,50.0,50.0,0.0,1.0,,Successful,...,0.00,2.0,2.516083e+09,Liverpool - Man Utd,Man Utd,Away,-0.004143,0.000000,Fred,
3,4.0,0.0,3.0,32,33.9,59.7,0.0,1.0,,Unsuccessful,...,0.05,3.0,2.516083e+09,Liverpool - Man Utd,Man Utd,Away,,,Lisandro Martínez,
4,3.0,0.0,6.0,26,19.9,30.3,0.0,1.0,,Successful,...,0.10,4.0,2.516083e+09,Liverpool - Man Utd,Liverpool,Home,0.001418,0.001418,Ibrahima Konaté,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1587,11655.0,85.0,,32,77.3,87.1,87.0,2.0,,Unsuccessful,...,,1311.0,2.516165e+09,Liverpool - Man Utd,Man Utd,Away,,,Alejandro Garnacho,
1588,697.0,0.0,0.0,32,0.0,0.0,5.0,,,Successful,...,0.00,1417.0,2.516160e+09,Liverpool - Man Utd,Man Utd,Away,,,Unknown,
1589,879.0,0.0,0.0,26,0.0,0.0,5.0,,,Successful,...,0.00,1418.0,2.516160e+09,Liverpool - Man Utd,Liverpool,Home,,,Unknown,
1590,1.0,0.0,0.0,26,0.0,0.0,0.0,,,Successful,...,0.00,1419.0,2.516027e+09,Liverpool - Man Utd,Liverpool,Home,,,Unknown,


In [19]:
data['is_pass_into_box'] = data.apply(ws_custom_events.pass_into_box, axis=1)


In [20]:
data['progressive_carry'] = data.apply(ws_custom_events.progressive_carry, axis=1)

In [22]:
data['progressive_carry'].value_counts()

True    16
Name: progressive_carry, dtype: int64

In [68]:
def find_defensive_actions(events_df):
    """ Return dataframe of in-play defensive actions from event data.

    Function to find all in-play defensive actions within a whscored-style events dataframe (single or multiple
    matches), and return as a new dataframe.

    Args:
        events_df (pandas.DataFrame): whoscored-style dataframe of event data. Events can be from multiple matches.

    Returns:
        pandas.DataFrame: whoscored-style dataframe of defensive actions.
    """

    # Define and filter defensive events
    defensive_actions = ['BallRecovery', 'BlockedPass', 'Challenge', 'Clearance', 'Foul', 'Interception', 'Tackle',
                         'Claim', 'KeeperPickup', 'Punch', 'Save']
    defensive_action_df = events_df[events_df['event_type'].isin(defensive_actions)]

    # Add a new column indicating whether each defensive action is true or false
    defensive_action_df['defensive_action'] = True

    return defensive_action_df


In [70]:
def find_offensive_actions(events_df):
    """ Return dataframe of in-play offensive actions from event data.

    Function to find all in-play offensive actions within a whoscored-style events dataframe (single or multiple
    matches), and return as a new dataframe.

    Args:
        events_df (pandas.DataFrame): whoscored-style dataframe of event data. Events can be from multiple matches.

    Returns:
        pandas.DataFrame: whoscored-style dataframe of offensive actions.
    """

    # Define and filter offensive events
    offensive_actions = [ 'TakeOn',  'OffsidePass', 'MissedShots', 'SavedShot', 'Goal', 'Carry']
    offensive_action_df = events_df[events_df['event_type'].isin(offensive_actions)]

    offensive_action_df['offensive_actions'] = True


    return offensive_action_df

In [71]:
find_offensive_actions(data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  offensive_action_df['offensive_actions'] = True


Unnamed: 0,eventId,minute,second,teamId,x,y,expandedMinute,period,type,outcomeType,...,xThreat_gen,playerName,is_throw_or_corner,is_carry_into_box,is_open_play,won_possession,is_progressive,is_pass_into_box,progressive_carry,offensive_actions
48,32.5,2.0,33.5,26,70.9,16.7,2.0,1.0,"{'value': 99, 'displayName': 'Carry'}",Successful,...,0.002426,Harvey Elliott,False,,True,False,False,False,,True
50,33.5,2.0,36.5,26,85.2,3.5,2.0,1.0,"{'value': 99, 'displayName': 'Carry'}",Successful,...,0.008566,Mohamed Salah,False,,True,False,True,False,True,True
52,35.0,2.0,40.0,26,86.6,32.4,2.0,1.0,,Successful,...,,Harvey Elliott,False,,True,False,False,False,,True
62,40.5,3.0,24.5,26,24.2,17.0,3.0,1.0,"{'value': 99, 'displayName': 'Carry'}",Successful,...,0.000947,Fabinho,False,,True,False,False,False,,True
84,51.5,4.0,23.0,26,79.5,68.4,4.0,1.0,"{'value': 99, 'displayName': 'Carry'}",Successful,...,0.074598,Mohamed Salah,False,True,True,False,True,True,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1555,858.5,91.0,41.0,26,80.8,83.8,93.0,2.0,"{'value': 99, 'displayName': 'Carry'}",Successful,...,0.009758,James Milner,False,,True,False,False,False,,True
1558,860.5,91.0,49.0,26,70.8,72.6,93.0,2.0,"{'value': 99, 'displayName': 'Carry'}",Successful,...,0.000000,Stefan Bajcetic,False,,True,False,False,False,,True
1562,863.5,91.0,55.0,26,74.1,89.4,93.0,2.0,"{'value': 99, 'displayName': 'Carry'}",Successful,...,0.002149,James Milner,False,,True,False,False,False,,True
1572,868.5,92.0,12.5,26,27.7,51.5,94.0,2.0,"{'value': 99, 'displayName': 'Carry'}",Successful,...,0.000302,Alisson,False,,True,False,False,False,,True


In [23]:
data.to_csv('./Data/liverpool03_11_23.csv')

In [75]:
#data['pre_assist'] = data.apply(ws_custom_events.pre_assist, axis=1)


TypeError: Cannot reset_index inplace on a Series to create a DataFrame