#Imports

In [None]:
#@title
import sys
import pandas as pd
pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', None)
import numpy as np
!git clone https://github.com/bosemessi/StatsbombOpenData --q
!pip install mplsoccer --q
from mplsoccer import Pitch, VerticalPitch
import ipywidgets as widgets
from ipywidgets import interact,interactive
import matplotlib.pyplot as plt
import matplotlib as mpl 
import matplotlib.font_manager as fm
!pip install highlight-text --q
from highlight_text import ax_text, fig_text
!pip install adjustText --q
from adjustText import adjust_text
!pip install gdown --quiet
import gdown
import matplotlib.patheffects as path_effects
from matplotlib.transforms import Affine2D
import mpl_toolkits.axisartist.floating_axes as floating_axes
import mpl_toolkits.axisartist.angle_helper as angle_helper
from matplotlib.projections import PolarAxes
from mpl_toolkits.axisartist.grid_finder import (FixedLocator, MaxNLocator,
                                                 DictFormatter)
import matplotlib.patches as patches
from PIL import Image
from io import StringIO, BytesIO
from tqdm import tqdm
import requests 
import warnings
warnings.filterwarnings('ignore')
from matplotlib.colors import LinearSegmentedColormap
from scipy.interpolate import RegularGridInterpolator

[?25l[K     |█████▊                          | 10kB 16.0MB/s eta 0:00:01[K     |███████████▍                    | 20kB 20.4MB/s eta 0:00:01[K     |█████████████████               | 30kB 22.2MB/s eta 0:00:01[K     |██████████████████████▊         | 40kB 23.9MB/s eta 0:00:01[K     |████████████████████████████▍   | 51kB 25.8MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 6.7MB/s 
[?25h  Building wheel for mplsoccer (setup.py) ... [?25l[?25hdone
  Building wheel for adjustText (setup.py) ... [?25l[?25hdone


#Read and pre-process data

In [None]:
#@title

df = pd.read_parquet('/content/StatsbombOpenData/WC2018.parquet').reset_index(drop=True)
Df = df[df.location.notna()].reset_index(drop=True)
Df[['x','y']] = np.array(list(Df.location))
Df['endloc'] = np.where(Df.type_name=='Pass',Df.pass_end_location,
                        np.where(Df.type_name=='Carry',Df.carry_end_location,Df.location))
Df[['endX','endY']] = np.array(list(Df.endloc))

Df['dist1'] = np.sqrt((Df.x - 120)**2 + (Df.y - 40)**2)
Df['dist2'] = np.sqrt((Df.endX - 120)**2 + (Df.endY - 40)**2)
Df['diffdist'] = Df['dist1'] - Df['dist2']

passmask = Df.type_name=='Pass'
successmask = Df.pass_outcome_name.isna()
openplaymask = Df.pass_type_name.isna()
shortpassmask = (Df.pass_length >= 5) & (Df.pass_length < 15)
mediumpassmask = (Df.pass_length >= 15) & (Df.pass_length < 30)
longpassmask = (Df.pass_length >= 30)
finalthirdmask = (Df.endX > 80) & (Df.x <= 80)
penaltyareamask = (Df.endX > 102) & (np.abs(Df.endY - 40) < 22)
pressuremask = Df.under_pressure==True
throughballmask = Df.pass_through_ball == True 
switchmask = Df.pass_switch == True 
crossmask = Df.pass_cross == True
distmask = (Df['dist1'] - Df['dist2'])/Df['dist1'] > 0.25
boxmask = ~(Df.x > 102) & (np.abs(Df.y - 40) < 22)
progmask = distmask | (boxmask & penaltyareamask)

carrymask = Df.type_name=='Carry'

Df['Passes'] = np.where(passmask,1,0)
Df['Successful Passes'] = np.where(passmask & successmask,1,0)
Df['Short Passes'] = np.where(passmask & shortpassmask,1,0)
Df['Successful Short Passes'] = np.where((Df['Short Passes']==1) & successmask,1,0)
Df['Medium Passes'] = np.where(passmask & mediumpassmask,1,0)
Df['Successful Medium Passes'] = np.where((Df['Medium Passes']==1) & successmask,1,0)
Df['Long Passes'] = np.where(passmask & longpassmask,1,0)
Df['Successful Long Passes'] = np.where((Df['Long Passes']==1) & successmask,1,0)
Df['Final Third Passes'] = np.where(passmask & finalthirdmask & openplaymask,1,0)
Df['Successful Final Third Passes'] = np.where((Df['Final Third Passes']==1) & successmask,1,0)
Df['Penalty Area Passes'] = np.where(passmask & penaltyareamask & openplaymask,1,0)
Df['Successful Penalty Area Passes'] = np.where((Df['Penalty Area Passes']==1) & successmask,1,0)
Df['Under Pressure Passes'] = np.where(passmask & pressuremask,1,0)
Df['Successful Under Pressure Passes'] = np.where(passmask & pressuremask & successmask,1,0)
Df['Throughballs'] = np.where(throughballmask,1,0)
Df['Successful Throughballs'] = np.where(throughballmask & successmask,1,0)
Df['Switches'] = np.where(switchmask,1,0)
Df['Successful Switches'] = np.where(switchmask & successmask,1,0)
Df['Crosses'] = np.where(crossmask,1,0)
Df['Successful Crosses'] = np.where(crossmask & successmask,1,0)
Df['Penalty Area Crosses'] = np.where(crossmask & penaltyareamask & openplaymask,1,0)
Df['Successful Penalty Area Crosses'] = np.where(crossmask & penaltyareamask & openplaymask & successmask,
                                                 1,0)
Df['Progressive Passes'] = np.where(passmask & progmask,1,0)
Df['Successful Progressive Passes'] = np.where(passmask & progmask & successmask,1,0)
Df['Pass Progressive Distance'] = np.where(passmask & (Df.diffdist > 0), Df.diffdist, 0)

Df['Carries'] = np.where(carrymask,1,0)
Df['Final Third Carries'] = np.where(carrymask & finalthirdmask,1,0)
Df['Progressive Carries'] = np.where(carrymask & progmask,1,0)
Df['Carry Distance'] = np.where(carrymask, np.sqrt((Df.x - Df.endX)**2 + (Df.y -Df.endY)**2),0)
Df['Carry Progressive Distance'] = np.where(carrymask & (Df.diffdist > 0), Df.diffdist, 0)

aggdict = {'Passes':'sum','Successful Passes':'sum','Short Passes':'sum', 
           'Successful Short Passes':'sum','Medium Passes':'sum', 
           'Successful Medium Passes':'sum','Long Passes':'sum', 
           'Successful Long Passes':'sum','Final Third Passes':'sum',
           'Successful Final Third Passes':'sum','Penalty Area Passes':'sum',
           'Successful Penalty Area Passes':'sum','Under Pressure Passes':'sum',
           'Successful Under Pressure Passes':'sum','Throughballs':'sum',
           'Successful Throughballs':'sum','Switches':'sum',
           'Successful Switches':'sum','Crosses':'sum',
           'Successful Crosses':'sum','Penalty Area Crosses':'sum',
           'Successful Penalty Area Crosses':'sum','Progressive Passes':'sum',
           'Successful Progressive Passes':'sum','pass_length':'sum',
           'Pass Progressive Distance':'sum','Carries':'sum',
           'Final Third Carries':'sum','Progressive Carries':'sum',
           'Carry Distance':'sum','Carry Progressive Distance':'sum'}

groupedstats = Df.groupby('player_name').agg(aggdict).reset_index()

groupedstats.rename(columns={"player_name": "name",
                             'pass_length':'Total Pass Length',
                             }, errors="raise",inplace=True)

#Mount google drive and save file

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
groupedstats.to_parquet('/content/drive/MyDrive/Norwich/passcarry.parquet',index=False)