In [1]:
#basic imports
import numpy as np
import pandas as pd

Todo
- Add in fitbit sleep analysis and heartrate scores
- Max reps, min reps
- Whether workout was done in order or not
- Exercise with the highest weight
- Total time resting vs. total time working out
- % of time in heart rate zones
- Number of steps, distance, calories, elevation, floors
- time slept previous day
- time slept the day of the workout
- time slept the next day
- general sleep metrics
- Days since last workout
- Days since last categorical workout
- Number of categorical workouts initiated
- My age
- Last known weight
- Day of week
- Duration of workout
- Start time of workout
- End time of workout
- Columnar intraday data for heart rate over time

In [1]:
def format_columns(df):
    '''set columns to lowercase, strip, and replace spaces with underscores'''
    df.columns = (df.columns
                  .str.strip()
                  .str.lower()
                  .str.replace(' \(lbs\)', '')
                  .str.replace(' ', '_') #remove the (lbs) subtitle on from weight
                 )
    return df

def impute_workout_features(df, routines):
    '''impute workout features'''
    #merging together some musclegroup categories
    condensed_categories = {
        'Triceps': 'Chest',
        'Biceps': 'Back'
    }
    df['category'] = df.category.replace(condensed_categories)
    
    #set up datetime index
    df['date'] = pd.to_datetime(df.date)
    
    #calculate total volume
    df['volume'] = df.eval('weight * reps')
    #determine Epley-formulated 1-rep max based
    df['one_rep_max'] = df.eval('weight + weight * reps / 30')
    
    #calculate speed for cardio
    df['time'] = pd.to_datetime(df.time)
    df['time'] = df.time.dt.minute * 60 + df.time.dt.second
    df['speed'] = df.eval('distance / time')
    
    #impute workout day by the max number of lifts in a category
    df['workout_day'] = df.groupby('date').category.transform(lambda s: s.value_counts(ascending = False).index[0])
    
    #determine completed sets
    df['set_completed'] = np.where(df.weight > 0, 1, 0)

    #Assign the routine each workout belongs to
    #iterate through routines database
    for row in routines.itertuples():
        #determine whether the dates of a workout category from fitnotes are within that categories start
        #and end dates from the routines database
        is_between_dates = df[df.category == row.category].date.between(row.start_date, row.end_date)
        #if true, assign the corresponding routine; else, NaN
        df.loc[df.category == row.category, 'routine'] = np.where(is_between_dates, row.routine, np.NaN)
    #replacing string nans with numpy nans, which propagate from np where for some reason, return to fix
    df.routine = df.routine.replace('nan', np.NaN)
    
    #set index as date for resampling later
    df = df.set_index('date')

    return df

def compute_gains_matrix(df):
    '''Create matrix of aggregated key performance indicators for workouts'''
    #remove cardio workouts
    df = df.loc[df.category != 'Cardio']
    #filling nans so the max aggfunc can work on the routine
    df.routine = df.routine.replace(np.NaN, 'n/a')
    #get key performance indicators for the workout
    gains = df.pivot_table(index = 'date', 
                           values = ['volume', 'reps', 'weight', 'workout_day', 'set_completed', 'one_rep_max', 'routine'],
                           aggfunc = {'volume': 'sum', 
                                            'reps': 'mean', 
                                            'weight': 'max', 
                                            'one_rep_max': 'max',
                                            'workout_day': 'max',
                                            'set_completed': 'mean',
                                            'routine': 'max'})
    #renaming the set completed to completion rate because all sets comprise a workout
    gains = gains.rename(columns = {'set_completed': 'completion_rate',
                                    'reps': 'mean_reps', 
                                    'weight': 'max_weight',
                                    'volume': 'total_volume'})
    return gains

In [5]:
fitnotes = pd.read_csv('fitnotes/FitNotes_Export.csv').pipe(format_columns)
routines = pd.read_csv('routines/routine_tracker.xlsx', 
                         dates = ['Start Date', 'End Date']).pipe(format_columns)

lifts = impute_workout_features(fitnotes, routines)
lifts.to_csv('lifts.csv')

gains = compute_gains_matrix(lifts)
gains.to_csv('gains.csv')