#### Import required modules

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Arc,Rectangle
import matplotlib.animation as animation

#### Read csv and drop all NaNs existing


In [2]:
df = pd.read_csv('data.csv')
df.dropna(axis=0,how='all',inplace=True)
df.dropna(axis=1,how='all',inplace=True)

#### Column types

In [3]:
df.dtypes

id           int64
half         int64
point_x    float64
point_y    float64
time        object
match        int64
tag          int64
dtype: object

In [4]:
# Cast time column to datetime
df.time = pd.to_datetime(df.time)
df.dtypes

id                  int64
half                int64
point_x           float64
point_y           float64
time       datetime64[ns]
match               int64
tag                 int64
dtype: object

In [5]:
df.head()

Unnamed: 0,id,half,point_x,point_y,time,match,tag
0,1,1,101.1491,278.9296,2018-03-11 10:14:24,4,6
1,2,1,168.8316,343.1585,2018-03-11 10:14:25,4,12
2,3,1,43.2572,256.8387,2018-03-11 10:14:25,4,5
3,4,1,90.504,144.5932,2018-03-11 10:14:25,4,3
4,5,1,101.9848,401.1742,2018-03-11 10:14:25,4,1


In [6]:
match = df.match.unique()[0]
match

4

#### Delete unnecessary columns

In [7]:
# Since we won't need id and match
del df['id'], df['match']
df.head()

Unnamed: 0,half,point_x,point_y,time,tag
0,1,101.1491,278.9296,2018-03-11 10:14:24,6
1,1,168.8316,343.1585,2018-03-11 10:14:25,12
2,1,43.2572,256.8387,2018-03-11 10:14:25,5
3,1,90.504,144.5932,2018-03-11 10:14:25,3
4,1,101.9848,401.1742,2018-03-11 10:14:25,1


In [8]:
# There might be multiple x&y locations for a tag at a second.
df.groupby(['half','tag','time']).count().head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,point_x,point_y
half,tag,time,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,2018-03-11 10:14:25,2,2
1,1,2018-03-11 10:14:26,1,1
1,1,2018-03-11 10:14:27,1,1
1,1,2018-03-11 10:14:28,2,2
1,1,2018-03-11 10:14:30,2,2
1,1,2018-03-11 10:14:31,3,3
1,1,2018-03-11 10:14:32,2,2
1,1,2018-03-11 10:14:33,3,3
1,1,2018-03-11 10:14:34,2,2
1,1,2018-03-11 10:14:35,2,2


#### Fix duplicate values 

In [9]:
df = df.groupby(['half', 'tag', 'time'])['point_x', 'point_y'].mean().reset_index()
df.groupby(['half','tag','time']).count().head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,point_x,point_y
half,tag,time,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,2018-03-11 10:14:25,1,1
1,1,2018-03-11 10:14:26,1,1
1,1,2018-03-11 10:14:27,1,1
1,1,2018-03-11 10:14:28,1,1
1,1,2018-03-11 10:14:30,1,1
1,1,2018-03-11 10:14:31,1,1
1,1,2018-03-11 10:14:32,1,1
1,1,2018-03-11 10:14:33,1,1
1,1,2018-03-11 10:14:34,1,1
1,1,2018-03-11 10:14:35,1,1


### Interpolation Function 

In [10]:
# Interpolates original data and returns new data frame
def get_interpolated_data(data_f,interval):
    df_temp = pd.DataFrame(columns=data_f.columns)
    for half in [1,2]:
        data = data_f[data_f.half == half].copy()
        tags = data.tag.unique()
        for t in tags:
            tag_data = data[data.tag == t].copy()
            d = tag_data.set_index('time').resample(interval).interpolate().reset_index()
            d = d.fillna({'half': half, 'tag': t})
            df_temp = df_temp.append(d)
    df_temp.reset_index(drop=True, inplace=True)
    df_temp.half = df_temp.half.astype('int')
    df_temp.tag =  df_temp.tag.astype('int')
    return df_temp

In [11]:
interval = '125ms' # This could be 'S'. For further information check resources section
df_interpolated = get_interpolated_data(df, interval)
df_interpolated.head(10)

Unnamed: 0,half,point_x,point_y,tag,time
0,1,101.9848,401.1742,1,2018-03-11 10:14:25.000
1,1,107.928888,395.230113,1,2018-03-11 10:14:25.125
2,1,113.872975,389.286025,1,2018-03-11 10:14:25.250
3,1,119.817063,383.341937,1,2018-03-11 10:14:25.375
4,1,125.76115,377.39785,1,2018-03-11 10:14:25.500
5,1,131.705238,371.453763,1,2018-03-11 10:14:25.625
6,1,137.649325,365.509675,1,2018-03-11 10:14:25.750
7,1,143.593412,359.565587,1,2018-03-11 10:14:25.875
8,1,149.5375,353.6215,1,2018-03-11 10:14:26.000
9,1,150.868025,350.2214,1,2018-03-11 10:14:26.125


#### Calculate distance covered by each tag at each half

In [12]:
df_dist = df_interpolated.copy()
dx = (df_dist['point_x'] - df_dist.groupby(['half', 'tag'])['point_x'].shift()).fillna(0)
dy = (df_dist['point_y'] - df_dist.groupby(['half', 'tag'])['point_y'].shift()).fillna(0)
df_dist['diff'] = np.sqrt(dx ** 2 + dy ** 2)
d_frame = df_dist.groupby(['half', 'tag'], as_index=False)['diff'].sum()
d_frame.columns.values[2] = 'distance'
d_frame.distance = (np.round(d_frame.distance / 10, decimals=0)).astype(int)
d_frame

Unnamed: 0,half,tag,distance
0,1,1,2681
1,1,2,2179
2,1,3,2940
3,1,4,2611
4,1,5,2241
5,1,6,2664
6,1,7,3478
7,1,8,2085
8,1,9,2917
9,1,11,2461


In [13]:
# Calculate total distance covered by each tag

d_frame.groupby(['tag'], as_index=False)['distance'].sum()\
                                                    .sort_values('distance', ascending = False)\
                                                    .reset_index(drop=True)

Unnamed: 0,tag,distance
0,7,6410
1,3,6363
2,9,5814
3,6,5640
4,1,5553
5,12,5182
6,11,5122
7,4,5112
8,5,4811
9,2,4747


#### Get Tag and Team relation

In [14]:
tag_team_df = pd.read_csv('tag_team.csv')
tag_team_df.dropna(axis=0,how='any',inplace=True)
tag_team_df

Unnamed: 0,tag,team
0,12,8
1,8,8
2,7,8
3,10,8
4,9,8
5,11,8
6,6,9
7,5,9
8,3,9
9,4,9


### Resources and References

[Pandas Resample](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.resample.html)

[Pandas Interpolate](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.interpolate.html)
