In [1]:
%matplotlib inline

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='white')

from utils import decorate
from thinkstats2 import Pmf, Cdf

import thinkstats2
import thinkplot
from datetime import datetime
import time

**Replacing dates with increasing integers to better compare things happening on the certain days**

In [2]:
games = pd.read_csv('games.csv')
gps = pd.read_csv('gps.csv')
rpe = pd.read_csv('rpe.csv')
wellness = pd.read_csv('wellness.csv')

for i, row in rpe.iterrows():
    t = datetime.strptime(row['Date'], '%Y-%m-%d')
    a1 = int(t.strftime('%Y%m%d'))
    rpe.at[i,'Date'] = a1

for i, row in games.iterrows():
    t = datetime.strptime(row['Date'], '%Y-%m-%d')
    a1 = int(t.strftime('%Y%m%d'))
    games.at[i,'Date'] = a1

for i, row in wellness.iterrows():
    t = datetime.strptime(row['Date'], '%Y-%m-%d')
    a1 = int(t.strftime('%Y%m%d'))
    wellness.at[i,'Date'] = a1
    
    #replacing percent strings with float percents
    wellness.at[i,'TrainingReadiness'] = float(row['TrainingReadiness'].strip('%'))

In [3]:
games.replace('W',1,inplace=True)
games.replace('L',0,inplace=True)
games.head()

Unnamed: 0,GameID,Date,Tournament,TournamentGame,Team,Opponent,Outcome,TeamPoints,TeamPointsAllowed
0,1,20171130,Dubai,1,Canada,Spain,1,19,0
1,2,20171130,Dubai,2,Canada,Ireland,1,31,0
2,3,20171130,Dubai,3,Canada,Fiji,1,31,14
3,4,20171201,Dubai,4,Canada,France,1,24,19
4,5,20171201,Dubai,5,Canada,Australia,0,7,25


In [4]:
rpe.head()

Unnamed: 0,Date,PlayerID,Training,SessionType,Duration,RPE,SessionLoad,DailyLoad,AcuteLoad,ChronicLoad,AcuteChronicRatio,ObjectiveRating,FocusRating,BestOutOfMyself
0,20180721,13,Yes,Mobility/Recovery,5.0,0.0,0.0,446.0,323.1,303.25,1.07,,,Not at all
1,20180721,13,Yes,Game,29.0,7.0,203.0,,,,,,,Not at all
2,20180721,13,Yes,Game,27.0,9.0,243.0,,,,,,,Not at all
3,20180720,11,Yes,Game,36.0,6.0,216.0,534.0,256.3,344.18,0.74,9.0,10.0,Absolutely
4,20180720,11,Yes,Game,36.0,8.0,288.0,,,,,9.0,10.0,Absolutely


In [5]:
wellness.head()

Unnamed: 0,Date,PlayerID,Fatigue,Soreness,Desire,Irritability,BedTime,WakeTime,SleepHours,SleepQuality,MonitoringScore,Pain,Illness,Menstruation,Nutrition,NutritionAdjustment,USGMeasurement,USG,TrainingReadiness
0,20180721,1,3,3,2,3,23:00:00,07:00:00,8.0,2,13,No,No,Yes,Excellent,Yes,No,,0
1,20180721,2,4,3,4,4,23:00:00,07:00:00,8.0,4,19,Yes,No,Yes,,,Yes,1.01,0
2,20180721,3,3,3,5,4,22:30:00,06:30:00,8.0,4,19,No,No,No,,,Yes,1.016,100
3,20180721,4,2,3,5,4,00:30:00,07:00:00,6.5,1,15,No,No,Yes,Excellent,Yes,Yes,1.025,95
4,20180721,5,5,3,4,4,23:45:00,07:00:00,7.25,4,20,No,No,No,Okay,Yes,Yes,1.022,100


**Standardizing player answers**

In [6]:
def standardize_col(df, col):
    
    player_grouped = df.groupby('PlayerID')
    
    for p_id in list(range(1,18)):
        mean = player_grouped.get_group(p_id)[col].mean()
        std = player_grouped.get_group(p_id)[col].std()
        for i, row in df.iterrows():
            if row['PlayerID'] == p_id:
                df.at[i, col+'_Z'] = (row[col]-mean)/std
            else:
                pass

wellness_cols = ['Fatigue', 'Soreness', 'Irritability', 'Desire', 'SleepQuality', 'TrainingReadiness']

for col in wellness_cols:
    standardize_col(wellness, col)

standardize_col(rpe,'RPE')

In [7]:
rpe.head()

Unnamed: 0,Date,PlayerID,Training,SessionType,Duration,RPE,SessionLoad,DailyLoad,AcuteLoad,ChronicLoad,AcuteChronicRatio,ObjectiveRating,FocusRating,BestOutOfMyself,RPE_Z
0,20180721,13,Yes,Mobility/Recovery,5.0,0.0,0.0,446.0,323.1,303.25,1.07,,,Not at all,-1.253472
1,20180721,13,Yes,Game,29.0,7.0,203.0,,,,,,,Not at all,1.153176
2,20180721,13,Yes,Game,27.0,9.0,243.0,,,,,,,Not at all,1.840789
3,20180720,11,Yes,Game,36.0,6.0,216.0,534.0,256.3,344.18,0.74,9.0,10.0,Absolutely,0.750374
4,20180720,11,Yes,Game,36.0,8.0,288.0,,,,,9.0,10.0,Absolutely,1.695403


In [8]:
wellness.head()

Unnamed: 0,Date,PlayerID,Fatigue,Soreness,Desire,Irritability,BedTime,WakeTime,SleepHours,SleepQuality,...,NutritionAdjustment,USGMeasurement,USG,TrainingReadiness,Fatigue_Z,Soreness_Z,Irritability_Z,Desire_Z,SleepQuality_Z,TrainingReadiness_Z
0,20180721,1,3,3,2,3,23:00:00,07:00:00,8.0,2,...,Yes,No,,0,0.318219,-0.304322,-0.987002,-0.415362,-1.551148,-0.562599
1,20180721,2,4,3,4,4,23:00:00,07:00:00,8.0,4,...,,Yes,1.01,0,0.207884,-0.230312,-0.186841,-0.121453,-0.10039,-0.410237
2,20180721,3,3,3,5,4,22:30:00,06:30:00,8.0,4,...,,Yes,1.016,100,-0.578512,-0.279482,0.143069,2.144929,0.255789,0.558418
3,20180721,4,2,3,5,4,00:30:00,07:00:00,6.5,1,...,Yes,Yes,1.025,95,-1.59178,-0.605094,0.463336,1.575278,-1.447874,0.621488
4,20180721,5,5,3,4,4,23:45:00,07:00:00,7.25,4,...,Yes,Yes,1.022,100,1.808898,-0.983282,0.054038,-0.403411,0.286967,0.891845


In [9]:
date_grouped = wellness.groupby('Date')
mean_fat = date_grouped['Fatigue_Z'].mean()
mean_sore = date_grouped['Soreness_Z'].mean()
mean_des = date_grouped['Desire_Z'].mean()
mean_irr = date_grouped['Irritability_Z'].mean()
mean_sleepq = date_grouped['SleepQuality_Z'].mean()
mean_sleept = date_grouped['SleepHours'].mean()
mean_readiness = date_grouped['TrainingReadiness_Z'].mean()

for i, row in games.iterrows():
    games.at[i, 'Fat'] = mean_fat[row['Date']]
    games.at[i, 'Sore'] = mean_sore[row['Date']]
    games.at[i, 'Des'] = mean_des[row['Date']]
    games.at[i, 'Irr'] = mean_irr[row['Date']]
    games.at[i, 'SleepQ'] = mean_sleepq[row['Date']]
    games.at[i, 'SleepT'] = mean_sleept[row['Date']]
    games.at[i, 'Readiness'] = mean_readiness[row['Date']]
    
games.head()

Unnamed: 0,GameID,Date,Tournament,TournamentGame,Team,Opponent,Outcome,TeamPoints,TeamPointsAllowed,Fat,Sore,Des,Irr,SleepQ,SleepT,Readiness
0,1,20171130,Dubai,1,Canada,Spain,1,19,0,0.494438,0.97405,1.672751,0.383781,0.07981,8.203125,0.619255
1,2,20171130,Dubai,2,Canada,Ireland,1,31,0,0.494438,0.97405,1.672751,0.383781,0.07981,8.203125,0.619255
2,3,20171130,Dubai,3,Canada,Fiji,1,31,14,0.494438,0.97405,1.672751,0.383781,0.07981,8.203125,0.619255
3,4,20171201,Dubai,4,Canada,France,1,24,19,-0.352514,-1.085568,1.077209,0.132107,-0.475545,7.171875,0.524882
4,5,20171201,Dubai,5,Canada,Australia,0,7,25,-0.352514,-1.085568,1.077209,0.132107,-0.475545,7.171875,0.524882


In [10]:
columns = ['Outcome', 'TeamPoints', 'TeamPointsAllowed', 'Fat', 'Sore', 'Des', 'Irr', 'SleepQ', 'SleepT',
           'Sum_Z']
games['Sum_Z'] = games['Fat']+games['Sore']+games['Des']+games['Irr']+games['SleepQ']+games['Readiness']
games[columns].corr()

Unnamed: 0,Outcome,TeamPoints,TeamPointsAllowed,Fat,Sore,Des,Irr,SleepQ,SleepT,Sum_Z
Outcome,1.0,0.740171,-0.751528,0.188176,0.216107,0.153654,0.04237,0.081989,0.125938,0.177702
TeamPoints,0.740171,1.0,-0.48378,0.185534,0.220732,0.120468,0.100627,0.00696,0.182159,0.147918
TeamPointsAllowed,-0.751528,-0.48378,1.0,-0.308816,-0.304649,-0.216315,-0.129036,-0.187217,-0.078184,-0.287469
Fat,0.188176,0.185534,-0.308816,1.0,0.884424,0.535827,0.595916,0.854533,0.554684,0.909093
Sore,0.216107,0.220732,-0.304649,0.884424,1.0,0.594816,0.446359,0.73427,0.629811,0.911977
Des,0.153654,0.120468,-0.216315,0.535827,0.594816,1.0,0.752196,0.695644,-0.025541,0.800593
Irr,0.04237,0.100627,-0.129036,0.595916,0.446359,0.752196,1.0,0.817361,0.137786,0.733905
SleepQ,0.081989,0.00696,-0.187217,0.854533,0.73427,0.695644,0.817361,1.0,0.387897,0.915718
SleepT,0.125938,0.182159,-0.078184,0.554684,0.629811,-0.025541,0.137786,0.387897,1.0,0.458045
Sum_Z,0.177702,0.147918,-0.287469,0.909093,0.911977,0.800593,0.733905,0.915718,0.458045,1.0


In [11]:
import statsmodels.formula.api as smf

outcome_model = smf.logit('Outcome ~ Fat + Sore + Des + Irr + SleepQ + SleepT + Readiness', data=games)
outcome_res = outcome_model.fit()
outcome_res.summary()

Optimization terminated successfully.
         Current function value: 0.616572
         Iterations 5


0,1,2,3
Dep. Variable:,Outcome,No. Observations:,38.0
Model:,Logit,Df Residuals:,30.0
Method:,MLE,Df Model:,7.0
Date:,"Sun, 07 Apr 2019",Pseudo R-squ.:,0.08087
Time:,23:01:22,Log-Likelihood:,-23.43
converged:,True,LL-Null:,-25.491
,,LLR p-value:,0.7655

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-12.4819,13.894,-0.898,0.369,-39.713,14.749
Fat,2.3093,2.565,0.900,0.368,-2.718,7.337
Sore,-1.0263,1.813,-0.566,0.571,-4.581,2.528
Des,3.6516,3.255,1.122,0.262,-2.728,10.032
Irr,-3.1695,5.056,-0.627,0.531,-13.079,6.740
SleepQ,-2.1108,3.189,-0.662,0.508,-8.360,4.139
SleepT,1.1104,1.432,0.776,0.438,-1.696,3.916
Readiness,-1.3527,2.819,-0.480,0.631,-6.877,4.171


In [12]:
offense_model = smf.ols('TeamPoints ~ Fat + Sore + Des + Irr + SleepQ + SleepT + Readiness', data=games)
offense_res = offense_model.fit()
offense_res.summary()

0,1,2,3
Dep. Variable:,TeamPoints,R-squared:,0.271
Model:,OLS,Adj. R-squared:,0.101
Method:,Least Squares,F-statistic:,1.597
Date:,"Sun, 07 Apr 2019",Prob (F-statistic):,0.175
Time:,23:01:22,Log-Likelihood:,-136.94
No. Observations:,38,AIC:,289.9
Df Residuals:,30,BIC:,303.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-48.6181,64.772,-0.751,0.459,-180.900,83.664
Fat,10.9513,11.650,0.940,0.355,-12.841,34.743
Sore,-0.3869,8.225,-0.047,0.963,-17.185,16.412
Des,12.6822,14.253,0.890,0.381,-16.427,41.791
Irr,16.6055,21.719,0.765,0.450,-27.750,60.961
SleepQ,-31.1177,14.480,-2.149,0.040,-60.691,-1.545
SleepT,5.8181,6.759,0.861,0.396,-7.985,19.621
Readiness,-13.3669,12.958,-1.032,0.311,-39.831,13.097

0,1,2,3
Omnibus:,1.149,Durbin-Watson:,2.362
Prob(Omnibus):,0.563,Jarque-Bera (JB):,0.922
Skew:,0.074,Prob(JB):,0.631
Kurtosis:,2.252,Cond. No.,352.0


In [13]:
defense_model = smf.ols('TeamPointsAllowed ~ Fat + Sore + Des + Irr + SleepQ + SleepT + Readiness', data=games)
defense_res = defense_model.fit()
defense_res.summary()

0,1,2,3
Dep. Variable:,TeamPointsAllowed,R-squared:,0.165
Model:,OLS,Adj. R-squared:,-0.03
Method:,Least Squares,F-statistic:,0.8461
Date:,"Sun, 07 Apr 2019",Prob (F-statistic):,0.559
Time:,23:01:23,Log-Likelihood:,-133.22
No. Observations:,38,AIC:,282.4
Df Residuals:,30,BIC:,295.5
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-12.7724,58.733,-0.217,0.829,-132.722,107.177
Fat,-11.9329,10.564,-1.130,0.268,-33.507,9.641
Sore,-3.1246,7.459,-0.419,0.678,-18.357,12.108
Des,2.2362,12.924,0.173,0.864,-24.159,28.631
Irr,-7.0840,19.694,-0.360,0.722,-47.304,33.136
SleepQ,14.5318,13.130,1.107,0.277,-12.284,41.348
SleepT,4.4085,6.129,0.719,0.478,-8.108,16.925
Readiness,-7.3942,11.750,-0.629,0.534,-31.391,16.603

0,1,2,3
Omnibus:,1.947,Durbin-Watson:,1.8
Prob(Omnibus):,0.378,Jarque-Bera (JB):,1.243
Skew:,-0.13,Prob(JB):,0.537
Kurtosis:,2.153,Cond. No.,352.0


**Seeing if RPE data can predict wellness data for the next day**

In [124]:
rpe.head()

Unnamed: 0,Date,PlayerID,Training,SessionType,Duration,RPE,SessionLoad,DailyLoad,AcuteLoad,ChronicLoad,AcuteChronicRatio,ObjectiveRating,FocusRating,BestOutOfMyself,RPE_Z
0,20180721,13,Yes,Mobility/Recovery,5.0,0.0,0.0,446.0,323.1,303.25,1.07,,,Not at all,-1.253472
1,20180721,13,Yes,Game,29.0,7.0,203.0,,,,,,,Not at all,1.153176
2,20180721,13,Yes,Game,27.0,9.0,243.0,,,,,,,Not at all,1.840789
3,20180720,11,Yes,Game,36.0,6.0,216.0,534.0,256.3,344.18,0.74,9.0,10.0,Absolutely,0.750374
4,20180720,11,Yes,Game,36.0,8.0,288.0,,,,,9.0,10.0,Absolutely,1.695403


In [125]:
wellness.head()

Unnamed: 0,Date,PlayerID,Fatigue,Soreness,Desire,Irritability,BedTime,WakeTime,SleepHours,SleepQuality,...,NutritionAdjustment,USGMeasurement,USG,TrainingReadiness,Fatigue_Z,Soreness_Z,Irritability_Z,Desire_Z,SleepQuality_Z,TrainingReadiness_Z
0,20180721,1,3,3,2,3,23:00:00,07:00:00,8.0,2,...,Yes,No,,0,0.318219,-0.304322,-0.987002,-0.415362,-1.551148,-0.562599
1,20180721,2,4,3,4,4,23:00:00,07:00:00,8.0,4,...,,Yes,1.01,0,0.207884,-0.230312,-0.186841,-0.121453,-0.10039,-0.410237
2,20180721,3,3,3,5,4,22:30:00,06:30:00,8.0,4,...,,Yes,1.016,100,-0.578512,-0.279482,0.143069,2.144929,0.255789,0.558418
3,20180721,4,2,3,5,4,00:30:00,07:00:00,6.5,1,...,Yes,Yes,1.025,95,-1.59178,-0.605094,0.463336,1.575278,-1.447874,0.621488
4,20180721,5,5,3,4,4,23:45:00,07:00:00,7.25,4,...,Yes,Yes,1.022,100,1.808898,-0.983282,0.054038,-0.403411,0.286967,0.891845


putting wellness avg data in the rpe dataframe, where the wellness data is from the day after the rpe data

In [None]:
def previous_day(df1, df2, )