# FEATURES - TRANSITIONS<a class="anchor" id="up"></a>

All functions return a DataFrame like


| teamId | feature |
| --- | --- |


Features are

* [Ball recovery time](#recovery_time) 
* [Ball recovery distance](#recovery_distance) 
* [x at 1 touches from recovery](#two_touches)
* [x > 75 after recovery](#counterattacks)
* [Launches after recovery](#recovery_launches)

    


In [1]:
import numpy as np
import pandas as pd
from scipy.spatial import distance
import pickle
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('clean/events_no_champions.csv')
if 'Unnamed: 0' in df.columns:
    del df['Unnamed: 0']
    
feats = pd.read_csv('clean/feats.csv')
    
display(df.head(2))
display(feats.head(2))

Unnamed: 0,eventId,subEventName,tags,playerId,matchId,eventName,teamId,matchPeriod,eventSec,subEventId,id,League,x0,y0,x1,y1,teamName,playerName,playerRole
0,8,Simple pass,1801,25413,2499719,Pass,1609,1H,2.758649,85.0,177959171,England,49,49,31.0,78.0,Arsenal,A. Lacazette,Forward
1,8,High pass,1801,370224,2499719,Pass,1609,1H,4.94685,83.0,177959172,England,31,78,51.0,75.0,Arsenal,R. Holding,Defender


Unnamed: 0,teamId,teamName
0,1609,Arsenal
1,1631,Leicester City


***
***

### Ball recovery time  <a class="anchor" id="recovery_time"></a>[up](#up)

Average time of ball recovery

In [3]:

def recovery_time(df):
    tmp = df.copy()
    
    # codice unico partita + tempo
    tmp['halfmatch'] = tmp['matchId'].astype(str) + '_' + tmp['matchPeriod']
    
    # concateno orizzontalmente ogni evento con il prossimo [halfmatch2, teamId2, eventSec2]
    tmp['halfmatch2'] = tmp[['halfmatch']].shift(-1)
    tmp.loc[tmp.shape[0]-1, 'halfmatch2'] = tmp.loc[tmp.shape[0]-1, 'halfmatch'] 
    tmp['teamId2'] = tmp[['teamId']].shift(-1)
    tmp['eventSec2'] = tmp[['eventSec']].shift(-1)
    
    # First event of each match has -1, Ball recoveries 1, Any other event 0 
    tmp['lost_ball'] = 0
    tmp.loc[tmp['halfmatch'] != tmp['halfmatch2'],'lost_ball'] = -1
    tmp['lost_ball'] = tmp['lost_ball'].shift(1)
    tmp.loc[0, 'lost_ball'] = -1
    
    tmp.loc[((tmp['teamId'] != tmp['teamId2']) & (tmp['halfmatch'] == tmp['halfmatch2'])),'lost_ball'] = 1
    
    # Only ball recoveries + start time of each match
    tmp = tmp.loc[tmp['lost_ball'] != 0,]
    
    
       
    # time difference between two ball recoveries
    tmp['recovery_time'] = tmp['eventSec2'].diff()
    tmp = tmp.loc[tmp['lost_ball'] == 1,]
    
    tmp = tmp.dropna() # drop first start
    tmp = tmp[['teamId2','recovery_time']].groupby('teamId2').mean().reset_index()
    
    tmp.columns = ['teamId','recovery_time']
            

    return tmp

a = recovery_time(df)
feats = pd.merge(feats, a, on = 'teamId', how = 'left')
display(feats.head())

Unnamed: 0,teamId,teamName,recovery_time
0,1609,Arsenal,7.345825
1,1631,Leicester City,7.795853
2,1625,Manchester City,6.689525
3,1651,Brighton & Hove Albion,8.664741
4,1646,Burnley,8.340093


### Ball recovery distance  <a class="anchor" id="recovery_distance"></a>[up](#up)

Average vertical (`x`) distance of ball recovery.

In [4]:

def recovery_distance(df):
    tmp = df.copy()
    
    # code match + half time
    tmp['halfmatch'] = tmp['matchId'].astype(str) + '_' + tmp['matchPeriod']
    
    # Horizontally append next event to the current one [halfmatch2, teamId2, eventSec2]
    tmp['halfmatch2'] = tmp[['halfmatch']].shift(-1)
    tmp.loc[tmp.shape[0]-1, 'halfmatch2'] = tmp.loc[tmp.shape[0]-1, 'halfmatch'] 
    tmp['teamId2'] = tmp[['teamId']].shift(-1)
    tmp['x0_2'] = tmp[['x0']].shift(-1)
    
    # First event of each match has -1, Ball recoveries 1, Any other event 0 
    tmp['lost_ball'] = 0
    tmp.loc[tmp['halfmatch'] != tmp['halfmatch2'],'lost_ball'] = -1
    tmp['lost_ball'] = tmp['lost_ball'].shift(1)
    tmp.loc[0, 'lost_ball'] = -1
    
    tmp.loc[((tmp['teamId'] != tmp['teamId2']) & (tmp['halfmatch'] == tmp['halfmatch2'])),'lost_ball'] = 1
    
    # Only ball recoveries + start time of each match
    tmp = tmp.loc[tmp['lost_ball'] != 0,]
    
    
       
    # time difference between two ball recoveries
    tmp['recovery_distance'] = tmp['x0_2'].diff()
    tmp = tmp.loc[tmp['lost_ball'] == 1,]
    
    tmp = tmp.dropna() # drop first start
    tmp = tmp[['teamId2','recovery_distance']].groupby('teamId2').mean().reset_index()
    
    tmp.columns = ['teamId','recovery_distance']
            

    return tmp

a = recovery_distance(df)
feats = pd.merge(feats, a, on = 'teamId', how = 'left')
display(feats.head())

Unnamed: 0,teamId,teamName,recovery_time,recovery_distance
0,1609,Arsenal,7.345825,0.78382
1,1631,Leicester City,7.795853,-0.128222
2,1625,Manchester City,6.689525,9.874423
3,1651,Brighton & Hove Albion,8.664741,-4.397881
4,1646,Burnley,8.340093,0.297576


### Distance after 1 touch from recovery  <a class="anchor" id="two_touches"></a>[up](#up)

Average vertical (`x`) distance 1 touches after ball recovery, if still in possess.

In [5]:

def one_touch(df):
    tmp = df.copy()
    
    # code match + half time
    tmp['halfmatch'] = tmp['matchId'].astype(str) + '_' + tmp['matchPeriod']
    
    # horizontally append next event 
    tmp['halfmatch2'] = tmp[['halfmatch']].shift(-2)
    tmp['halfmatch1'] = tmp[['halfmatch']].shift(-1)
    tmp['teamId2'] = tmp[['teamId']].shift(-1)

    tmp['t1'] = tmp[['teamId']].shift(-1)
    tmp['t2'] = tmp[['teamId']].shift(-2)
    
    tmp['x0_2'] = tmp[['x0']].shift(-2)

    
    
    # Ball recoveries 1, Any other event 0 
    tmp['lost_ball'] = 0
    tmp.loc[((tmp['teamId'] != tmp['teamId2']) & (tmp['halfmatch'] == tmp['halfmatch1'])),'lost_ball'] = 1

    
    # Three touches from the same team after a lost ball (same match)
    tmp = tmp.loc[((tmp['lost_ball'] == 1) & (tmp['halfmatch2'] == tmp['halfmatch']) &
                  (tmp['t1'] == tmp['teamId2']) &
                  (tmp['t2'] == tmp['teamId2'])),]
    
    tmp['one_touch'] = tmp['x0_2'] - tmp['x0']
    tmp = tmp[['teamId2','one_touch']].groupby('teamId2').mean().reset_index()
    
    tmp.columns = ['teamId','one_touch']
            

    return tmp

a = one_touch(df)
feats = pd.merge(feats, a, on = 'teamId', how = 'left')
display(feats.head())

Unnamed: 0,teamId,teamName,recovery_time,recovery_distance,one_touch
0,1609,Arsenal,7.345825,0.78382,-1.121964
1,1631,Leicester City,7.795853,-0.128222,-0.960219
2,1625,Manchester City,6.689525,9.874423,6.480371
3,1651,Brighton & Hove Albion,8.664741,-4.397881,-4.69101
4,1646,Burnley,8.340093,0.297576,2.048844


### Counterattacks after recovery  <a class="anchor" id="counterattacks"></a>[up](#up)

How many times per match a team recovers the ball and reaches `x > 75` within the next 2 touches.

In [6]:

def counterattacks(df):
    tmp = df.copy()
    
    # code match + half time
    tmp['halfmatch'] = tmp['matchId'].astype(str) + '_' + tmp['matchPeriod']
    
    # horizontally append next event 
    tmp['halfmatch4'] = tmp[['halfmatch']].shift(-4)
    tmp['halfmatch3'] = tmp[['halfmatch']].shift(-3)
    tmp['halfmatch2'] = tmp[['halfmatch']].shift(-2)
    tmp['halfmatch1'] = tmp[['halfmatch']].shift(-1)
    
    tmp['teamId2'] = tmp[['teamId']].shift(-1)

    
    tmp['t2'] = tmp[['teamId']].shift(-2)
    tmp['t3'] = tmp[['teamId']].shift(-3)
    tmp['t4'] = tmp[['teamId']].shift(-4)
    
    
    tmp['x0_2'] = tmp[['x0']].shift(-2)
    tmp['x0_3'] = tmp[['x0']].shift(-3)
    tmp['x0_4'] = tmp[['x0']].shift(-4)
    
    
    # Lost balls only
    tmp = tmp.loc[((tmp['teamId'] != tmp['teamId2']) & (tmp['halfmatch'] == tmp['halfmatch1'])),]
    
    
    # x > 75 within 3 touches
    tmp['x75'] = 0
    tmp.loc[((tmp['halfmatch2'] == tmp['halfmatch']) & (tmp['t2'] == tmp['teamId2']) &
            (tmp['x0_2'] > 75)), 'x75'] = 1
    
#     tmp.loc[((tmp['halfmatch3'] == tmp['halfmatch']) & (tmp['t3'] == tmp['teamId2']) &
#             (tmp['x0_3'] > 75)), 'x75'] = 1
    
#     tmp.loc[((tmp['halfmatch4'] == tmp['halfmatch']) & (tmp['t4'] == tmp['teamId2']) &
#             (tmp['x0_4'] > 75)), 'x75'] = 1
    
    
    tmp = tmp[['teamId2','x75', 'matchId']].groupby(['teamId2','matchId']).mean().reset_index()
    tmp = tmp[['teamId2','x75']].groupby(['teamId2']).mean().reset_index()

    
    tmp.columns = ['teamId','counterattacks']
            

    return tmp

a = counterattacks(df)
feats = pd.merge(feats, a, on = 'teamId', how = 'left')
display(feats.head())

Unnamed: 0,teamId,teamName,recovery_time,recovery_distance,one_touch,counterattacks
0,1609,Arsenal,7.345825,0.78382,-1.121964,0.075473
1,1631,Leicester City,7.795853,-0.128222,-0.960219,0.071367
2,1625,Manchester City,6.689525,9.874423,6.480371,0.095754
3,1651,Brighton & Hove Albion,8.664741,-4.397881,-4.69101,0.068132
4,1646,Burnley,8.340093,0.297576,2.048844,0.07746


### Launches after recovery <a class="anchor" id="recovery_launches"></a>[up](#up)

How many times per match a team recovers the ball and immediately produce a `Launch` event.

In [7]:

def recovery_launch(df):
    tmp = df.copy()
    
    # code match + half time
    tmp['halfmatch'] = tmp['matchId'].astype(str) + '_' + tmp['matchPeriod']
    
    # horizontally append next event 
    tmp['halfmatch1'] = tmp[['halfmatch']].shift(-1)
    tmp['halfmatch2'] = tmp[['halfmatch']].shift(-2)
    tmp['teamId1'] = tmp[['teamId']].shift(-1)
    tmp['teamId2'] = tmp[['teamId']].shift(-2)
    tmp['subEventId2'] = tmp['subEventId'].shift(-2)

    
    
    # Lost balls only
    tmp = tmp.loc[((tmp['teamId'] != tmp['teamId1']) & (tmp['halfmatch'] == tmp['halfmatch1'])),]
    
    
    # Launch after recovery
    tmp['recovery_launch'] = 0
    tmp.loc[((tmp['halfmatch2'] == tmp['halfmatch']) & (tmp['teamId2'] == tmp['teamId1']) &
            (tmp['subEventId2'] == 84)), 'recovery_launch'] = 1

    
    tmp = tmp[['teamId2','recovery_launch', 'matchId']].groupby(['teamId2','matchId']).mean().reset_index()
    tmp = tmp[['teamId2','recovery_launch']].groupby(['teamId2']).mean().reset_index()

    
    tmp.columns = ['teamId','recovery_launch']
            

    return tmp

a = recovery_launch(df)
feats = pd.merge(feats, a, on = 'teamId', how = 'left')
display(feats.head())

Unnamed: 0,teamId,teamName,recovery_time,recovery_distance,one_touch,counterattacks,recovery_launch
0,1609,Arsenal,7.345825,0.78382,-1.121964,0.075473,0.006607
1,1631,Leicester City,7.795853,-0.128222,-0.960219,0.071367,0.010084
2,1625,Manchester City,6.689525,9.874423,6.480371,0.095754,0.003885
3,1651,Brighton & Hove Albion,8.664741,-4.397881,-4.69101,0.068132,0.016097
4,1646,Burnley,8.340093,0.297576,2.048844,0.07746,0.01895


In [8]:
feats.to_csv('clean/feats_transizione.csv', index = False)