# Fitnotes Processing Notebook
This notebook processes workout data manually collected from the Fitnotes workout Android application, creating four tables:
- Sets, showing set-level data during the workout.
- Exercises, showing aggregate set, rep, and volume metrics for each exercise.
- Workouts, showing aggregate workout metadata.
- Intensity, creating a reference table for repetition ranges as a percentage of one-rep maxes.

In [1]:
#basic imports
import numpy as np
import pandas as pd

import glob
from datetime import date
from sqlalchemy import create_engine
engine = create_engine('sqlite:///../gains.db')

In [2]:
def format_columns(df):
    '''set columns to lowercase, strip, and replace spaces with underscores'''
    df.columns = (df.columns
                  .str.strip()
                  .str.lower()
                  .str.replace(' \(lbs\)', '') #remove the (lbs) subtitle from weight column
                  .str.replace(' ', '_') 
                 )
    return df

def Sets(df, routines, weight):
    '''
    Impute workout features
    '''
    
    # format dates
    df.date = pd.to_datetime(df.date)
    
    # calculate speed for cardio in units per second
    df.time = pd.to_datetime(df.time)
    df.time = df.time.dt.minute * 60 + df.time.dt.second
    # df['speed'] = df.eval('distance / time')
    
    ### add bodyweight to exercises where I have to lift my whole body ###
    
    # merge dataframe with bodyweight within 60 days of measurement
    df = pd.merge_asof(df, weight[['date', 'bodyweight']], on = 'date', direction = 'backward', tolerance = pd.Timedelta('60 days'))
    
    # identify bodyweight exercises and add my bodyweight to the weight lifted
    bodyweight_exercises = ['Pull Up', 'Chin Up', 'Close Grip Dip']
    is_bodyweight = df.exercise.isin(bodyweight_exercises)
    df.loc[is_bodyweight, 'weight'] += df.loc[is_bodyweight, 'bodyweight']
    
    # drop the bodyweight column
    df = df.drop(['bodyweight'], axis = 1)

    # calculate total volume and Epley-formulated one rep max
    df['volume'] = df.eval('weight * reps')
    df['one_rep_max'] = df.eval('weight * (1 + reps / 30)')
    
    # calculate last known max on a rolling basis; then, calculate relative volume
    df['last_max'] = df.groupby('exercise').one_rep_max.transform(lambda s: s.rolling(30).max())
    df.last_max = df.last_max.mask(df.last_max.isnull(), df.one_rep_max)
    df.one_rep_max = df.last_max
    df = df.drop('last_max', axis = 1)
    
    df['intensity'] = df.eval('weight / one_rep_max')
    df['relative_volume'] = df.eval('weight * reps * intensity')
    
    # determine completed sets for weights and cardio
    df['set_completed'] = 0
    df.loc[(df.category == 'Cardio') & (df.distance > 0), 'set_completed'] = 1
    df.loc[df.weight > 0, 'set_completed'] = 1
    
    # make null set_completed before I began officially tracking that metric
    df.loc[df.date < '5-18-18', 'set_completed'] = np.nan

    # assign routine, superset, and order within each superset
    
    # for every dimension of the routines table...
    cols = ['routine'] # , 'superset', 'sub_superset']
    
    # ...instantiate dimenstion to nan, then...
    for col in cols:
        df[col] = np.nan
        
        # for every exercise in a routine... (each row is an exercise)
        for idx, row in routines.iterrows():

            # if a performed exercise occurs within the dates of that routine's exercise
            exercise_within_dates = (df.exercise == row.exercise) & df.date.between(row.start_date, row.end_date)

            # ...assign the corresponding column
            df[col] = df[col].mask(exercise_within_dates, row[col])
            
    # determine exercise set within each workout, the set the exercise set of exercises I failed to do to null
    # df['exercise_set'] = df.groupby(['date', 'exercise', 'set_completed'], as_index = False).cumcount() + 1
    # df.exercise_set = df.exercise_set.mask(df.set_completed == 0, np.nan)
    
    # sort exercises into the order they were performed during the workout
    # df = df.sort_values(['date', 'superset', 'exercise_set', 'sub_superset'])
    
    # define workout_set after sorting
    # df['workout_set'] = df.groupby(['date', 'set_completed'], as_index = False).cumcount() + 1 # group by set completion to not count uncompleted sets
    
    ### prepare for export ###
    
    # set column order
    col_order = ['date', 'routine', 'category', 'exercise', 
                 'weight', 'reps', 'intensity', 'one_rep_max', 'volume', 'relative_volume',
                 'distance_unit', 'distance', 'time', 'set_completed', 'comment']
    
    df = df[col_order]
    
    return df

def calculate_age(x):
    '''
    Calculate how old I am on a particular date
    '''
    # source: https://stackoverflow.com/questions/2217488/age-from-birthdate-in-python/2259711
    # My age is today's year minus the year of my birth.
    born = date(1995, 1, 1)
    age = x.year - born.year - ((x.month, x.day) < (born.month, born.day))
    return age

def rank_muscle_volume(s):
    '''
    Assign each muscle group to a column based on rank
    '''
    
    # iterate through ranks and return index value (the muscle group) of the series
    # row corresponding to the rank
    ranked = {}
    for i in np.arange(1, s.max() + 1):
        ranked['muscle_{}'.format(i)] = s[s == i].index[0]
    ranked = pd.Series(ranked)
    return ranked

def Workouts(df):
    '''
    Create matrix of aggregated key performance indicators for workouts
    '''

    # get key workout performance indicators
    agg = {
        'volume': 'sum', 
        'set_completed': 'mean',
        'id': 'count',
    }

    gains = (df
             .groupby('date', as_index = False)
             .agg(agg)
             .rename(columns = {'set_completed': 'completion_rate',  # rename set_completed to completion_rate because all sets comprise the workout
                                'id': 'sets'
                               })
            )
    
    gains['weekday'] = gains.date.dt.weekday_name
    gains['primary_lift'] = df.groupby('date', as_index = False).apply(lambda g: g.set_index('exercise').weight.idxmax()).fillna('Cardio')
    gains['category'] = df.groupby('date', as_index = False).apply(lambda g: g.set_index('category').volume.idxmax()).fillna('Cardio')
    gains['days_since_workout'] = gains.date.diff().fillna(pd.Timedelta('0 days')).dt.days
    gains['days_since_category'] = gains.groupby('category').date.diff().fillna(pd.Timedelta('0 days')).dt.days
    
    gains = pd.merge_ordered(gains, df[['date', 'category', 'routine']], 
                             on = ['date', 'category'],
                             how = 'left',
                            ).drop_duplicates()

    
    # calculate my age
    # gains['age'] = gains['date'].dt.date.apply(calculate_age)
    
    gains = gains[['date', 'weekday', 'routine', 'category', 'primary_lift', 'sets', 'volume', 'completion_rate', 'days_since_workout', 'days_since_category']]
    
    return gains
def Intensity():
    '''
    Produce workout intensity table.
    '''
    intensity = pd.DataFrame()

    intensity['response'] = ['strength', 'strength and hypertrophy', 'hypertrophy']

    # set 1rm max range and then reverse to fit the table
    intensity['intensity'] = pd.IntervalIndex.from_breaks([.6, .75, .85, 1], closed = 'left')[::-1]
    intensity['reps_per_set'] = pd.IntervalIndex.from_breaks([1,3,8,12], closed = 'both')
    intensity['low_volume'] = pd.IntervalIndex.from_breaks([1,5,15,25], closed = 'left')
    intensity['moderate_volume'] = pd.IntervalIndex.from_breaks([5, 15, 25, 50], closed = 'left')
    intensity['high_volume'] = pd.IntervalIndex.from_breaks([15, 25, 50, np.inf], closed = 'left')
    
    return intensity

def Volume(df):
    '''
    Aggregate statistics for exercises within a workout
    '''
    df['id'] = df.index 
    agg = {'category': max,
           'routine': max,
           'reps': sum, 
           'volume': sum, 
           'relative_volume': sum,
           'intensity': 'mean',
           'id': 'count'}
    
    exercises = (df
                 .groupby(['date', 'exercise'], as_index = False)[list(agg.keys())]
                 .agg(agg)
                 .rename(columns = {'id': 'sets', 'intensity': 'avg_intensity'})
                 .query('volume > 0'))
    
    # convert reps to int
    exercises.reps = exercises.reps.astype(int)
    
    exercises = exercises[['date', 'routine', 'category', 'exercise', 'sets', 'reps', 'avg_intensity', 'volume', 'relative_volume']]
    
    return exercises

In [3]:
saved_workouts = glob.glob('exports/Fitnotes*.csv')

In [4]:
routines = pd.read_csv('routines.csv', parse_dates = ['start_date', 'end_date'])
fitnotes = pd.read_csv(saved_workouts[-1]).pipe(format_columns)
weight = pd.read_sql('weight', con = engine, parse_dates = 'date').rename(columns = {'weight':'bodyweight'})

In [5]:
sets = Sets(fitnotes, routines, weight)

In [6]:
volume = Volume(sets)
# workouts = Workouts(sets)

In [7]:
sets.to_sql('sets', con = engine, if_exists = 'replace', index = False)
volume.to_sql('volume', con = engine, if_exists = 'replace', index = False)
workouts.to_sql('workouts', con = engine, if_exists = 'replace', index = False)
one_rep_max.to_sql('one_rep_max', con = engine, if_exists = 'replace', index = False)

NameError: name 'workouts' is not defined