In [22]:
import pandas as pd
import numpy as np

In [4]:
#raw data available here
#https://mega.nz/file/9kg2HaJb#XYLNOo-NFNOA-qjgF0Mw4gRYFHLop3ynY1Aton2RpOQ

#Load data as dataframes
gpsdf=pd.read_csv("GPSsample.csv")
visdf=pd.read_csv("vissample.csv")

In [17]:
#Identify epoch of quarter starts and ends
qts=visdf[(visdf['periodSeconds']==0) | (visdf['stats']=='end')]['time'].tolist()

#mark GPS data with quarters and drop non-play periods
#Here I'm assuming the GPS data has the correctly synched time, which it doesn't.
gpsdf.loc[(gpsdf['timeEpoch']>qts[0]) & (gpsdf['timeEpoch']<qts[1]),'q']=1
gpsdf.loc[(gpsdf['timeEpoch']>qts[2]) & (gpsdf['timeEpoch']<qts[3]),'q']=2
gpsdf.loc[(gpsdf['timeEpoch']>qts[4]) & (gpsdf['timeEpoch']<qts[5]),'q']=3
gpsdf.loc[(gpsdf['timeEpoch']>qts[6]) & (gpsdf['timeEpoch']<qts[7]),'q']=4
gpsdf=gpsdf[~gpsdf['q'].isna()].copy()

#Convert the GPS co-ordinates to pitch co-ordinates in metres
#This calculation also re-orients the data so that the home team is always kicking right
vlength=165
vwidth=130
gpsdf['xm']=( (1-gpsdf['hkl'])*(0.5+gpsdf['x']) + gpsdf['hkl']*(0.5-gpsdf['x']) )*vlength
gpsdf['ym']=( (1-gpsdf['hkl'])*(0.4+gpsdf['y']) + gpsdf['hkl']*(0.4-gpsdf['y']) )*vwidth/0.8


In [83]:
#lets work out KPickett's speeds in the match
p='KPickett'
pspeeds=pd.DataFrame()

#Number of frames to "smooth" velocity over. h means:
#velocity now is the distance moved between h frames before now and h frames after
#i.e. h=2 looks across 4 frames, or 0.4 seconds.
h=2

#Look at each quarter separately and collate at the end
for q in [1,2,3,4]:
    pgps=gpsdf[(gpsdf['name']==p) & (gpsdf['q']==q)].copy().sort_values(by='timeEpoch').set_index('timeEpoch')

    #Work out speed using finite-differencing, Pythagoras
    xms=pgps['xm'].tolist()
    yms=pgps['ym'].tolist()
    ts=pgps.index.tolist()
    
    
    speeds=[
        np.sqrt(
            ((xms[i+h]-xms[i-h])/(ts[i+h]-ts[i-h]))**2+
            ((yms[i+h]-yms[i-h])/(ts[i+h]-ts[i-h]))**2
        ) for i in range(h,len(yms)-h)
    ]
    pgps['speeds']=[np.nan for i in range(h)]+speeds+[np.nan for i in range(h)]
    #raw speed is in metres per millisecond, adjust to km/h
    pgps['speedskm/h']=pgps['speeds']*3600
    pspeeds=pspeeds.append(pgps[['q','countup','speedskm/h']])


#KPickett's "official" maximum speed for the match was 34.6km/h

#Depending on what value of h you use in the previous cell,
# you may see values higher or lower than this

#Other players seem to record higher than KPickett's maximum,
# so it's entirely possible that the "official" data is filtered
# or their speed is calculated in a different manner

display(pspeeds.sort_values(by='speedskm/h',ascending=False).head(10))

Unnamed: 0_level_0,q,countup,speedskm/h
timeEpoch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1632568415650,3.0,1347.0,34.843021
1632568415150,3.0,1347.0,34.249579
1632562469850,1.0,1118.0,34.130693
1632568415450,3.0,1347.0,34.083299
1632568415850,3.0,1347.0,34.016459
1632568415750,3.0,1347.0,34.000069
1632568415050,3.0,1347.0,33.972804
1632562470550,1.0,1119.0,33.91378
1632562470750,1.0,1119.0,33.91378
1632562470350,1.0,1119.0,33.808154
