# Fitnotes Processing Notebook
This notebook processes workout data manually collected from the Fitnotes workout Android application, creating four tables:
- Sets, showing set-level data during the workout.
- Exercises, showing aggregate set, rep, and volume metrics for each exercise.
- Workouts, showing aggregate workout metadata.
- Intensity, creating a reference table for repetition ranges as a percentage of one-rep maxes.

In [1]:
#basic imports
import numpy as np
import pandas as pd

import glob
from datetime import date

In [2]:
def format_columns(df):
    '''set columns to lowercase, strip, and replace spaces with underscores'''
    df.columns = (df.columns
                  .str.strip()
                  .str.lower()
                  .str.replace(' \(lbs\)', '') #remove the (lbs) subtitle from weight column
                  .str.replace(' ', '_') 
                 )
    return df

def impute_workout_features(df, routines):
    '''impute workout features'''
    
    # format dates
    df['date'] = pd.to_datetime(df.date)
    
    # calculate speed for cardio in units per second
    df['time'] = pd.to_datetime(df.time)
    df['time'] = df.time.dt.minute * 60 + df.time.dt.second
    df['speed'] = df.eval('distance / time')
    
    # merge together some muscle groups
    condensed_categories = {
        'Triceps': 'Chest',
        'Biceps': 'Back'
    }
    
    df['category'] = df.category.replace(condensed_categories)
    
    # calculate total volume and Epley-formulated 1-rep max
    df['volume'] = df.eval('weight * reps')
    df['one_rep_max'] = df.eval('weight + weight * reps / 30')
    

    # determine completed sets for weights and cardio
    df['set_completed'] = 0
    df.loc[(df.category == 'Cardio') & (df.distance > 0), 'set_completed'] = 1
    df.loc[df.weight > 0, 'set_completed'] = 1
    
    # determine workout id and exercise set within each workout, the set the exercise set of exercises I failed to do to null
    df['id'] = df.groupby('date').ngroup() + 1 # I like starting at one
    df['exercise_set'] = df.groupby(['date', 'exercise', 'set_completed'], as_index = False).cumcount() + 1
    df.exercise_set = df.exercise_set.mask(df.set_completed == 0, np.nan)
    
    # assign routine, superset, and order within each superset
    
    # for every dimension of the routines table...
    cols = ['routine', 'superset', 'sub_superset']
    
    # ...instantiate dimenstion to nan, then...
    for col in cols:
        df[col] = np.nan
        
        # for every exercise in a routine... (each row is an exercise)
        for idx, row in routines.iterrows():

            # if a performed exercise occurs within the dates of that routine's exercise
            exercise_within_dates = (df.exercise == row.exercise) & df.date.between(row.start_date, row.end_date)

            # ...assign the corresponding column
            df[col] = df[col].mask(exercise_within_dates, row[col])
    
    # sort exercises into the order they were performed during the workout
    df = df.sort_values(['date', 'superset', 'exercise_set', 'sub_superset'])
    
    # define workout_set after sorting
    df['workout_set'] = df.groupby(['date', 'set_completed'], as_index = False).cumcount() + 1 # group by set completion to not count uncompleted sets
    
    # set column order
    col_order = ['id', 'date', 'exercise', 'routine', 'category', 'workout_set', 'exercise_set', 
                 'superset', 'sub_superset', 'weight', 'reps', 'volume', 'one_rep_max', 
                 'distance_unit', 'distance', 'time',  'speed', 'set_completed', 'comment']
    
    df = df[col_order]
    
    return df

def calculate_age(x):
    '''
    Calculate how old I am on a particular date
    '''
    # source: https://stackoverflow.com/questions/2217488/age-from-birthdate-in-python/2259711
    # My age is today's year minus the year of my birth.
    born = date(1995, 1, 1)
    age = x.year - born.year - ((x.month, x.day) < (born.month, born.day))
    return age

def rank_muscle_volume(s):
    '''
    Assign each muscle group to a column based on rank
    '''
    
    # iterate through ranks and return index value (the muscle group) of the series
    # row corresponding to the rank
    ranked = {}
    for i in np.arange(1, s.max() + 1):
        ranked['muscle_{}'.format(i)] = s[s == i].index[0]
    ranked = pd.Series(ranked)
    return ranked

def compute_gains_matrix(df):
    '''
    Create matrix of aggregated key performance indicators for workouts
    '''

    # get key workout performance indicators
    agg = {
        'volume': 'sum', 
        'reps': 'mean', 
        'set_completed': 'mean',
    }
    gains = df.groupby(['id', 'date'], as_index = False).agg(agg)

    # rename set_completed to completion_rate because all sets comprise the workout
    gains = gains.rename(columns = {'set_completed': 'completion_rate',
                                    'reps': 'mean_reps',
                                    'weight': 'max_weight'})
    
    gains['weekday'] = gains.date.dt.weekday_name
    gains['primary_lift'] = df.groupby('date', as_index = False).apply(lambda g: g.set_index('exercise').weight.idxmax()).fillna('Cardio')
    gains['category'] = df.groupby('date', as_index = False).apply(lambda g: g.set_index('category').volume.idxmax()).fillna('Cardio')
    gains['days_since_workout'] = gains.date.diff().fillna(pd.Timedelta('0 days')).dt.days
    gains['days_since_category'] = gains.groupby('category').date.diff().fillna(pd.Timedelta('0 days')).dt.days
    
    # calculate my age
    gains['age'] = gains['date'].dt.date.apply(calculate_age)
    
    gains = gains[['id', 'date', 'weekday', 'category', 'primary_lift', 'volume', 'mean_reps', 'completion_rate', 'days_since_workout', 'days_since_category', 'age']]
    return gains

def Intensity():
    '''
    Produce workout intensity table.
    '''
    intensity = pd.DataFrame()

    intensity['response'] = ['strength', 'strength and hypertrophy', 'hypertrophy']

    # set 1rm max range and then reverse to fit the table
    intensity['intensity'] = pd.IntervalIndex.from_breaks([.6, .75, .85, 1], closed = 'left')[::-1]
    intensity['reps_per_set'] = pd.IntervalIndex.from_breaks([1,3,8,12], closed = 'both')
    intensity['low_volume'] = pd.IntervalIndex.from_breaks([1,5,15,25], closed = 'left')
    intensity['moderate_volume'] = pd.IntervalIndex.from_breaks([5, 15, 25, 50], closed = 'left')
    intensity['high_volume'] = pd.IntervalIndex.from_breaks([15, 25, 50, np.inf], closed = 'left')
    
    return intensity

def Exercises(df):
    '''
    Aggregate statistics for exercises within a workout
    '''
    exercises = (df
     .groupby(['id', 'date', 'exercise'], as_index = False)
     [['reps', 'workout_set', 'volume', 'category', 'routine', 'one_rep_max']]
     .agg({'category': max,
           'routine': max,
         'reps': sum, 
           'volume': sum, 
           'workout_set': 'count', 
           'one_rep_max': max}).rename(columns = {'workout_set': 'sets'})
     .query('volume > 0'))
    
    # round one rep maxes down to the nearest whole number
    exercises.one_rep_max = exercises.one_rep_max.apply(np.floor).astype(int)
    exercises.reps = exercises.reps.astype(int)
    
    exercises = exercises[['id', 'date', 'routine', 'category', 'exercise', 'sets', 'reps', 'volume', 'one_rep_max']]
    
    return exercises

In [3]:
saved_workouts = glob.glob('exports/Fitnotes*.csv')

In [5]:
routines = pd.read_csv('routines.csv')
fitnotes = pd.read_csv(saved_workouts[-1]).pipe(format_columns)

In [6]:
sets = impute_workout_features(fitnotes, routines)
exercises = Exercises(sets)
intensity = Intensity()
workouts = compute_gains_matrix(sets)

In [7]:
sets.to_csv('sets.csv', index = False)
exercises.to_csv('exercises.csv', index = False)
intensity.to_csv('intensity.csv', index = False)
workouts.to_csv('workouts.csv', index = False)