In [None]:
from datetime import datetime
import json
import os
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import dateutil.parser

import seaborn as sns
sns.set(style="whitegrid")


In [11]:
with open('configs.json', 'r') as f:
    configs = json.load(f)

path = configs['DATA_DIRECTORY']

list_subfolders_with_paths = [f.path for f in os.scandir(path) if f.is_dir()]


In [27]:
srpe = pd.DataFrame()
wellness = pd.DataFrame()
injury = pd.DataFrame()

for i, path in enumerate(list_subfolders_with_paths):
    # read srpe data
    srpe_file = f'{path}\\pmsys\\srpe.csv'
    if os.path.exists(srpe_file):
        srpe_temp = pd.read_csv(srpe_file)
        srpe_temp['pid'] = i + 1
        srpe_temp['end_date_time'] = srpe_temp['end_date_time'].apply(lambda x: 
                                                                 datetime.utcfromtimestamp(int(dateutil.parser.parse(x).timestamp())))
        srpe_temp.sort_values('end_date_time', inplace=True)
        srpe = pd.concat([srpe, srpe_temp], ignore_index=True)
    else:
        print(f"File {srpe_file} doesn't exist!")
    
    # read wellness data
    wellness_file = f'{path}\\pmsys\\wellness.csv'  # effective_time_frame
    if os.path.exists(wellness_file):
        wellness_temp = pd.read_csv(wellness_file)
        wellness_temp['pid'] = i + 1
        wellness_temp['effective_time_frame'] = wellness_temp['effective_time_frame'].apply(
            lambda x: datetime.utcfromtimestamp(int(dateutil.parser.parse(x).timestamp()))
        )
        wellness_temp.sort_values('effective_time_frame', inplace=True)
        wellness = pd.concat([wellness, wellness_temp], ignore_index=True)
    else:
        print(f"File {wellness_file} doesn't exist!")
    
    # read injury data
    injury_file = f'{path}\\pmsys\\injury.csv'
    if os.path.exists(injury_file):
        injury_temp = pd.read_csv(injury_file)
        injury_temp['pid'] = i + 1
        injury_temp['effective_time_frame'] = injury_temp['effective_time_frame'].apply(
            lambda x: datetime.utcfromtimestamp(int(dateutil.parser.parse(x).timestamp()))
        )
        injury_temp.sort_values('effective_time_frame', inplace=True)
        injury = pd.concat([injury, injury_temp], ignore_index=True)
    else:
        print(f"File '{injury_file}' doesn't exist!")
       
    


File 'D:\Pet\Datasets\pmdata\p08\pmsys\injury.csv' doesn't exist!


In [None]:
srpe.head()

In [None]:
print("****************** SRPE ******************")
print("Total numbers of records:", len(srpe))
print("Number of entries equal to null:")
print("   end_date_time: ", len(srpe.loc[srpe['end_date_time'].isnull()])) 
print("   perceived_exertion: ", len(srpe.loc[srpe['perceived_exertion'].isnull()]))
print("   duration_min: ", len(srpe.loc[srpe['duration_min'].isnull()]))

In [None]:
fig, axs = plt.subplots(int(np.ceil(len(srpe['pid'].unique()) / 3)), 3, figsize=(12, 10))

for pid in srpe['pid'].unique():
    y = srpe.loc[srpe['pid'] == pid, ['perceived_exertion', 'duration_min']]
    axs[int((pid-1) // 3), int((pid-1) % 3)].plot(y['perceived_exertion'] * y['duration_min'])
    axs[int((pid-1) // 3), int((pid-1) % 3)].set_title(f'Participant {pid}')

for ax in axs.flat:
    ax.set(ylabel='SRPE')

# Hide x labels and tick labels for top plots and y ticks for right plots.
for ax in axs.flat:
    ax.label_outer()

In [None]:
srpe['date'] = srpe['end_date_time'].apply(lambda x: x.date())

srpe.head()

In [None]:
duplicate_rows = srpe[srpe.duplicated(['date', 'pid'])]
print(duplicate_rows)

In [None]:
srpe_aggregated = srpe.groupby(['date', 'pid']).aggregate({'perceived_exertion': 'sum', 'duration_min': 'sum'}).reset_index()

srpe_aggregated.head()

In [None]:
print('Statistics: ')
srpe_aggregated.loc[:, ['perceived_exertion', 'duration_min']].describe()

In [None]:
week_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
srpe_aggregated['weekday'] = srpe_aggregated['date'].apply(lambda x: week_days[x.weekday()])

In [None]:
fig = plt.figure(figsize=(8, 6))
plt.hist(srpe_aggregated['weekday'])
plt.show()


In [None]:
wellness.head()

In [None]:
wellness.loc[wellness['soreness_area'] == '[]', 'soreness_area'] = None
wellness.loc[wellness["soreness_area"].notnull(), 'soreness_area'] = wellness.loc[wellness["soreness_area"].notnull(), 'soreness_area'].apply(lambda x: json.loads(x.replace("'", '"')))

In [None]:
print("***************** Wellness *****************")
print("Total numbers of records:", len(wellness))
print("Number of entries equal to null:")
print("   effective_time_frame: ", len(wellness.loc[wellness['effective_time_frame'].isnull()])) 
print("   fatigue: ", len(wellness.loc[wellness['fatigue'].isnull()]))
print("   mood: ", len(wellness.loc[wellness['mood'].isnull()]))
print("   readiness: ", len(wellness.loc[wellness['readiness'].isnull()]))
print("   sleep_duration_h: ", len(wellness.loc[wellness['sleep_duration_h'].isnull()]))
print("   sleep_quality: ", len(wellness.loc[wellness['sleep_quality'].isnull()]))
print("   soreness: ", len(wellness.loc[wellness['soreness'].isnull()]))
print("   soreness_area: ", len(wellness.loc[wellness['soreness_area'].isnull()]))
print("   stress: ", len(wellness.loc[wellness['stress'].isnull()]))

In [None]:
sns.pairplot(wellness.loc[:, ['mood', 'stress', 'soreness', 'fatigue', 'sleep_duration_h', 'sleep_quality', 'readiness']])

In [None]:
wellness['date'] = wellness['effective_time_frame'].apply(lambda x: x.date())

wellness.head()

In [None]:
injury.head()

In [None]:
injury.loc[injury['injuries'] == '{}', 'injuries'] = None
injury.loc[injury["injuries"].notnull(), 'injuries'] = injury.loc[injury["injuries"].notnull(), 'injuries'].apply(lambda x: json.loads(x.replace("'", '"')))

In [None]:
print("****************** Injury *******************")
print("Total numbers of records:", len(injury))
print("Number of entries equal to null:")
print("   effective_time_frame: ", len(injury.loc[injury['effective_time_frame'].isnull()])) 
print("   injuries: ", len(injury.loc[injury['injuries'].isnull()]))


In [None]:
injury['date'] = injury['effective_time_frame'].apply(lambda x: x.date())

injury.head()

In [None]:
# minor = 1
# major = 2
def severity(data):  
    _severity = 0
    for day_group in data['injuries'].values:
        for k, v in day_group.items():
            if v == 'minor':
                _severity += 1
            elif v == 'major':
                _severity += 2
            else:
                print('Different type of severity!')
    return _severity  


injury_severity = pd.DataFrame()

for pid, injury_group in injury.loc[injury["injuries"].notnull()].groupby('pid'):
    dates = list()
    severities = list()
    for date, group in injury_group.groupby('date'):
        dates.append(date)
        severities.append(severity(group))
    severity_temp = pd.DataFrame({'date': dates, 'injury_severity': severities})
    severity_temp['pid'] = pid
    severity_temp.sort_values('date', inplace=True)
    injury_severity = pd.concat([injury_severity, severity_temp], ignore_index=True)

In [None]:
injury_severity.head()

In [None]:
print('The number of days with injury by participant id: ')
injury_severity['pid'].value_counts()

In [None]:
df = wellness.merge(injury_severity, how='left', on=['pid', 'date'])

df = df.merge(srpe_aggregated.loc[:, ['date', 'pid', 'perceived_exertion', 'duration_min']], how='left', on=['pid', 'date'])

In [None]:
df.loc[df['injury_severity'].isnull(), 'injury_severity'] = 0
df.loc[df['perceived_exertion'].isnull(), 'perceived_exertion'] = 0
df.loc[df['duration_min'].isnull(), 'duration_min'] = 0

In [None]:
df.drop(columns=['effective_time_frame', 'soreness_area'], inplace=True)

In [None]:
df.head()

In [None]:
len(df)

In [None]:
df['pid'].value_counts()

In [None]:
print('Non-NA cells for each column:')
df.count()

In [None]:
df['srpe'] = df['perceived_exertion'] * df['duration_min']

# Reflection of training variation across the week
# Monotony = Mean sRPE/Standard Deviation
df['monotony'] = df.groupby('pid')['srpe'].apply(lambda x: x.rolling(7).mean() / x.rolling(7).std())

# Reflection of overall training stress from the week
# Strain = Weekly sRPE*Monotony
df['strain'] = df.groupby('pid')['srpe'].apply(lambda x: x.rolling(7).sum() * (x.rolling(7).mean() / x.rolling(7).std()))

In [None]:
srpe_statistics = pd.DataFrame(columns=['pid', 'mean', 'std'])
monotony_statistics = pd.DataFrame(columns=['pid', 'mean', 'std'])
strain_statistics = pd.DataFrame(columns=['pid', 'mean', 'std'])

for pid, group in df.groupby('pid'):
    srpe_statistics = srpe_statistics.append(
        {
            'pid': pid,
            'mean': group['srpe'].mean(),           
            'std': group['srpe'].std()
        },
        ignore_index=True
    )
    
    monotony_statistics = monotony_statistics.append(
        {
            'pid': pid,
            'mean': group['monotony'].mean(),            
            'std': group['monotony'].std()
        },
        ignore_index=True
    )
    
    strain_statistics = strain_statistics.append(
        {
            'pid': pid,
            'mean': group['strain'].mean(),            
            'std': group['strain'].std()
        },
        ignore_index=True
    )   
    
srpe_statistics.set_index('pid', inplace=True)
monotony_statistics.set_index('pid', inplace=True)
strain_statistics.set_index('pid', inplace=True)

srpe_statistics.index = srpe_statistics.index.astype(int)
monotony_statistics.index = monotony_statistics.index.astype(int)
strain_statistics.index = strain_statistics.index.astype(int)
    

In [None]:
srpe_statistics.plot.bar(rot=0, subplots=True, figsize=(8, 6), title='SRPE Statistics')
monotony_statistics.plot.bar(rot=0, subplots=True, figsize=(8, 6), title='Monotony Statistics')
strain_statistics.plot.bar(rot=0, subplots=True, figsize=(8, 6), title='Strain Statistics')
plt.show()

In [None]:
fig, axs = plt.subplots(int(np.ceil(len(df['pid'].unique()) / 3)), 3, figsize=(12, 10))

for pid in df['pid'].unique():
    y = df.loc[df['pid'] == pid, ['monotony']]
    axs[int((pid-1) // 3), int((pid-1) % 3)].plot(y)
    axs[int((pid-1) // 3), int((pid-1) % 3)].set_title(f'Participant {pid}')

for ax in axs.flat:
    ax.set(ylabel='Monotony')

# Hide x labels and tick labels for top plots and y ticks for right plots.
for ax in axs.flat:
    ax.label_outer()

In [None]:
fig, axs = plt.subplots(int(np.ceil(len(df['pid'].unique()) / 3)), 3, figsize=(12, 10))

for pid in df['pid'].unique():
    y = df.loc[df['pid'] == pid, ['strain']]
    axs[int((pid-1) // 3), int((pid-1) % 3)].plot(y)
    axs[int((pid-1) // 3), int((pid-1) % 3)].set_title(f'Participant {pid}')

for ax in axs.flat:
    ax.set(ylabel='Strain')

# Hide x labels and tick labels for top plots and y ticks for right plots.
for ax in axs.flat:
    ax.label_outer()

In [None]:
corrmat = raw_data.corr()
f, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(corrmat, vmax=.8, square=True)

corr = train_df_copy[columns].dropna().corr()
plt.figure(figsize = (16,16))
sns.set(font_scale=1)
sns.heatmap(corr, cbar = True, annot=True, square = True, fmt = '.2f', xticklabels=True, yticklabels=True) 
plt.show()

In [None]:
"Let's look"

In [None]:
# 1. https://www.researchgate.net/publication/289128300_How_to_monitor_training_load_and_mode_using_sRPE
# "if the same total training load was instead equally divided into several consecutive ‘medium’ training days, the score for monotony would be high and the athlete’s risk of illness, overtraining and under-performance would increase."