### Introduction to week 5 workbook: Creating a (simple) global metric

### Import Libraries and Dataset 
### Step 1


In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sn
import matplotlib.dates as mdates
import datetime
import plotly.graph_objects as go

import scipy.stats as stats

df = pd.read_csv("../Data/FH.csv")

In [2]:
df.columns
df.head()

Unnamed: 0.1,Unnamed: 0,Timestamp,Seconds,Velocity,Acceleration,Odometer,Latitude,Longitude,Heart Rate,Player Load,AthleteID
0,0,9/30/2018 12:21:49 PM,0.0,0.06,-0.041234,0.0,42.263222,-83.741055,122,0.0,Athlete 1
1,1,9/30/2018 12:21:49 PM,0.1,0.06,-0.025926,0.0,42.263223,-83.741055,122,0.0,Athlete 1
2,2,9/30/2018 12:21:49 PM,0.2,0.06,-0.011945,0.0,42.263223,-83.741055,122,0.0,Athlete 1
3,3,9/30/2018 12:21:49 PM,0.3,0.09,0.048539,0.0,42.263223,-83.741055,122,0.0,Athlete 1
4,4,9/30/2018 12:21:49 PM,0.4,0.08,0.021406,0.0,42.263223,-83.741055,122,0.0,Athlete 1


### Step 2  Dataframe clean-up

In [3]:
columns_to_keep = ['Total.Distance', 'Total.Player.Load', 'Maximum.Velociy', 'Fake_Name']
df_FH = df.reindex(columns=columns_to_keep)
df_FH.head()

Unnamed: 0,Total.Distance,Total.Player.Load,Maximum.Velociy,Fake_Name
0,,,,
1,,,,
2,,,,
3,,,,
4,,,,


In [4]:
df.drop(['Unnamed: 0', 'Latitude', 'Longitude', 'Heart Rate'], axis=1, inplace=True) # we can drop the previous index column ('unnamed') and some others

In [5]:
master=df.set_index(['Timestamp', 'AthleteID'], inplace=False) # we'll use a multi-index to keep time in the index and the player ID #
master.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Seconds,Velocity,Acceleration,Odometer,Player Load
Timestamp,AthleteID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
9/30/2018 12:21:49 PM,Athlete 1,0.0,0.06,-0.041234,0.0,0.0
9/30/2018 12:21:49 PM,Athlete 1,0.1,0.06,-0.025926,0.0,0.0
9/30/2018 12:21:49 PM,Athlete 1,0.2,0.06,-0.011945,0.0,0.0
9/30/2018 12:21:49 PM,Athlete 1,0.3,0.09,0.048539,0.0,0.0
9/30/2018 12:21:49 PM,Athlete 1,0.4,0.08,0.021406,0.0,0.0


### Step 3 - Make new variables -- be sure you pay attention to the changes in the performance variables

In [6]:
master['farthest']=master['Odometer'].diff(200) # 20 second window of recent distance covered (in meters) -- this will provide us perspective on the anaerobic capacity of the player

In [7]:
master['OneMinuteDistance']=master['Odometer'].diff(600) # 1 minute window of recent distance covered (in meters).

In [8]:
master.loc[master['OneMinuteDistance'] <0,'OneMinuteDistance'] = np.nan
master.loc[master['farthest'] <0,'farthest'] = np.nan

In [9]:
master.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Seconds,Velocity,Acceleration,Odometer,Player Load,farthest,OneMinuteDistance
Timestamp,AthleteID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
9/30/2018 12:21:49 PM,Athlete 1,0.0,0.06,-0.041234,0.0,0.0,,
9/30/2018 12:21:49 PM,Athlete 1,0.1,0.06,-0.025926,0.0,0.0,,
9/30/2018 12:21:49 PM,Athlete 1,0.2,0.06,-0.011945,0.0,0.0,,
9/30/2018 12:21:49 PM,Athlete 1,0.3,0.09,0.048539,0.0,0.0,,
9/30/2018 12:21:49 PM,Athlete 1,0.4,0.08,0.021406,0.0,0.0,,


### Step 4

In [10]:
MaxValues_df=master.groupby('AthleteID').agg([max])
print(MaxValues_df)

            Seconds Velocity Acceleration  Odometer Player Load farthest  \
                max      max          max       max         max      max   
AthleteID                                                                  
Athlete 1   8759.98     7.24     5.981192  10036.75       891.5   119.37   
Athlete 10  8749.93     6.79     5.580821   7269.15       615.6   111.93   
Athlete 11  8749.93     6.56     3.825559   4791.84       449.0    87.14   
Athlete 12  8749.91     7.38     4.360430   8551.21       734.4   177.37   
Athlete 13  8749.93     6.33     7.034368   6345.99       609.2    83.81   
Athlete 14  7855.38     6.58     4.908402   9906.41       951.3    94.24   
Athlete 15  8749.93     6.61    11.318196   5001.16       540.0    85.37   
Athlete 17  8749.93     7.71     5.889059   9896.45       888.5    97.05   
Athlete 18  8759.92     6.63     5.557953   6326.56       688.5    81.20   
Athlete 19  8749.92     6.85     4.089531   7952.18       781.6   107.76   
Athlete 2   

### Step 5

In [11]:
MaxValues_df.apply(stats.zscore)

MaxValues_df['zscores_accel'] = stats.zscore(MaxValues_df['Acceleration'])
MaxValues_df['zscores_far'] = stats.zscore(MaxValues_df['farthest'])
MaxValues_df['zscores_three'] = stats.zscore(MaxValues_df['OneMinuteDistance'])
print(MaxValues_df)

            Seconds Velocity Acceleration  Odometer Player Load farthest  \
                max      max          max       max         max      max   
AthleteID                                                                  
Athlete 1   8759.98     7.24     5.981192  10036.75       891.5   119.37   
Athlete 10  8749.93     6.79     5.580821   7269.15       615.6   111.93   
Athlete 11  8749.93     6.56     3.825559   4791.84       449.0    87.14   
Athlete 12  8749.91     7.38     4.360430   8551.21       734.4   177.37   
Athlete 13  8749.93     6.33     7.034368   6345.99       609.2    83.81   
Athlete 14  7855.38     6.58     4.908402   9906.41       951.3    94.24   
Athlete 15  8749.93     6.61    11.318196   5001.16       540.0    85.37   
Athlete 17  8749.93     7.71     5.889059   9896.45       888.5    97.05   
Athlete 18  8759.92     6.63     5.557953   6326.56       688.5    81.20   
Athlete 19  8749.92     6.85     4.089531   7952.18       781.6   107.76   
Athlete 2   

### Step 6

In [12]:
# adding up the z-scores to provide my simple global metric! 

MaxValues_df['Metric'] = stats.zscore(MaxValues_df['Acceleration']) + stats.zscore(MaxValues_df['farthest'])+stats.zscore(MaxValues_df['OneMinuteDistance']) + 10

print(MaxValues_df)

            Seconds Velocity Acceleration  Odometer Player Load farthest  \
                max      max          max       max         max      max   
AthleteID                                                                  
Athlete 1   8759.98     7.24     5.981192  10036.75       891.5   119.37   
Athlete 10  8749.93     6.79     5.580821   7269.15       615.6   111.93   
Athlete 11  8749.93     6.56     3.825559   4791.84       449.0    87.14   
Athlete 12  8749.91     7.38     4.360430   8551.21       734.4   177.37   
Athlete 13  8749.93     6.33     7.034368   6345.99       609.2    83.81   
Athlete 14  7855.38     6.58     4.908402   9906.41       951.3    94.24   
Athlete 15  8749.93     6.61    11.318196   5001.16       540.0    85.37   
Athlete 17  8749.93     7.71     5.889059   9896.45       888.5    97.05   
Athlete 18  8759.92     6.63     5.557953   6326.56       688.5    81.20   
Athlete 19  8749.92     6.85     4.089531   7952.18       781.6   107.76   
Athlete 2   

### Step 7 Identify the top 2 athletes on the team and compare with the workbook top 2

Here are the results ( for the key -- from the workbook)... Two highest were #1 and #12... followed closely by #10.
AthleteID              
Athlete 1   13.411208  
Athlete 10  13.130881  
Athlete 11   9.668775  
Athlete 12  13.335177  
Athlete 13   9.275439  
Athlete 14   9.810296  
Athlete 15   9.629877  
Athlete 17  12.085971  
Athlete 18   9.155264  
Athlete 19  11.004381  
Athlete 2    3.110064  
Athlete 20  12.648865  
Athlete 21   9.045761  
Athlete 22   1.769149  
Athlete 23  11.676773  
Athlete 24  10.933958  
Athlete 3    8.963054  
Athlete 4   10.274788  
Athlete 5    9.414085  
Athlete 6   10.793858  
Athlete 7   11.620004  
Athlete 9    9.242372