# FitnessMetric

## Setup and Data Preperation

In [None]:
import pandas as pd
import numpy as np
import os
from data import *

In [None]:
df_trainings = pd.read_csv('trainings.csv')

In [None]:
new_features = pd.read_csv('new_features.csv')

In [None]:
features = new_features.merge(df_trainings, on='training_id', how='left')

In [None]:
features = pd.read_csv('merged.csv')

In [None]:
features['start_date'] = pd.to_datetime(features['start_date'])

In [None]:
default_date = '2020-01-01 00:00:00'
default_user = 'b9c3a54f8d5837e88a86bdb47eee432218aa2530012afaca527b29e662e20cb6'#'0638ef6493b8192ade0e0d8429306b7685fe1191cc6e1337fab13521922cf226'
default_training = 'f53cad390efa69cae1d05ed39f60c82f65861dfadb8f41867f0a96fa806a26be'

In [None]:
features['day'] = pd.to_datetime(features['start_date']).dt.dayofyear
features['days_discount'] = 365 - features['day']
features = features.sort_values(by=['user_id', 'days_discount'], ascending=False)

## Metric Calculation

In [None]:
#
# this function calculates the Cardiovascular Efficiency
#
def calculate_CVE(df_features, training_id):
    run = df_features[df_features['training_id'] == training_id]
    user_id = run['user_id'].values[0]
    # get the max heart rate of that person
    max_hr = df_features[df_features['user_id'] == user_id]['hr_max'].max()
    max_hr = max_hr
    mean_hr = run['hr_mean'].values[0]
    hr_dist = max_hr - mean_hr
    split_avg = 1/run['speed_mean_30s'].values[0] * 50 / 3
    return hr_dist/split_avg/5
# calculate the CVE for all the runs
features['CVE'] = 0
for training_id in features['training_id'].unique():
    CVE = calculate_CVE(features, training_id)
    features.loc[features['training_id'] == training_id, 'CVE'] = CVE
#
# this calculates the Training Load
#
features['duration_h'] = features['duration'] / 60  / 60
#
# this calculates the Discounted Values after every Training
#
# discount factor based on the days since the run (Research Value)
factor = 0.995
# fill na with mean of user
features['CVE'] = features.groupby('user_id')['CVE'].fillna(features['CVE'].mean())
features['duration_h'] = features.groupby('user_id')['duration_h'].fillna(features['duration_h'].mean())
# for every person change the first fitness to 0
features['CVE'] = features.groupby('user_id')['CVE'].shift(1)
features['duration_h'] = features.groupby('user_id')['duration_h'].shift(1)
# for every person fill the NaN with 25% percentile
features['CVE'] = features.groupby('user_id')['CVE'].apply(lambda x: x.fillna(x.quantile(0.5)*67))
features['duration_h'] = features.groupby('user_id')['duration_h'].apply(lambda x: x.fillna(x.quantile(0.5)*67))
# calculate the training_score for all the runs
features['CVE_discounted'] = features['CVE'] * factor**features['days_discount']
features['CVE_discounted_cumsum'] = features.groupby('user_id')['CVE_discounted'].cumsum()
features['Total_CVE'] = features['CVE_discounted_cumsum'] / factor**features['days_discount']
features['duration_h_discounted'] = features['duration_h'] * factor**features['days_discount']
features['duration_h_discounted_cumsum'] = features.groupby('user_id')['duration_h_discounted'].cumsum()
features['Training_Load'] = features['duration_h_discounted_cumsum'] / factor**features['days_discount']
# Personal Training Effectiveness (PTE)
features['PTE'] = features['Total_CVE'] - features['Training_Load']
# only keep the difference, fitness_duration and fitness_CVE of the prev added features
plot_features = features[['start_date','training_id', 'user_id', 'type', 'PTE', 'Training_Load', 'Total_CVE']]

## Plot the fitness of selected users

In [None]:
random_users = plot_features['user_id'].unique()[0:10]

In [None]:
import matplotlib.pyplot as plt
# now plot it for 10 randomly selected users
for user in random_users:
    # plot_features[plot_features['user_id'] == user].plot(x='start_date', y=['Training_Load', 'Total_CVE', 'PTE'], figsize=(20, 10))


    # Plot the data
    fig, ax1 = plt.subplots(figsize=(20, 10))

    plot_features[plot_features['user_id'] == user].plot(x='start_date', y=['Training_Load', 'Total_CVE'], ax=ax1)
    ax1.set_xlabel('Start Date')
    ax1.set_ylabel('Training Load and Total CVE')
    ax1.tick_params('y')

    ax2 = ax1.twinx()
    plot_features[plot_features['user_id'] == user].plot(x='start_date', y='PTE', ax=ax2, color='tab:green')
    ax2.set_ylabel('PTE')
    ax2.tick_params('y')

    plt.xlabel('Day')
    plt.ylabel('Fitness')
    plt.title('Fitness vs. Day' + ' User_ID: ' + user)
    # make directory if not exists
    directory = "fitness_plots"
    if not os.path.exists(directory):
        os.makedirs(directory)
    # save in directory
    plt.savefig(directory + '/' + user + '.png')


    plt.show()