In [1]:
#basic imports
import numpy as np
import pandas as pd

import glob
import json
from datetime import date

Todo
- Add in fitbit sleep analysis and heartrate scores
- Total time resting vs. total time working out
- % of time in heart rate zones
- Number of steps, distance, calories, elevation, floors
- time slept previous day
- time slept the day of the workout
- time slept the next day
- general sleep metrics
- Number of categorical workouts initiated
- Duration of workout
- Start time of workout
- End time of workout
- Columnar intraday data for heart rate over time

In [2]:
def format_columns(df):
    '''set columns to lowercase, strip, and replace spaces with underscores'''
    df.columns = (df.columns
                  .str.strip()
                  .str.lower()
                  .str.replace(' \(lbs\)', '')
                  .str.replace(' ', '_') #remove the (lbs) subtitle on from weight
                 )
    return df

def impute_workout_features(df, routines):
    '''impute workout features'''
    #merging together some musclegroup categories
    condensed_categories = {
        'Triceps': 'Chest',
        'Biceps': 'Back'
    }
    df['category'] = df.category.replace(condensed_categories)
    
    #set up datetime index
    df['date'] = pd.to_datetime(df.date)
    
    #calculate total volume
    df['volume'] = df.eval('weight * reps')
    #determine Epley-formulated 1-rep max based
    df['one_rep_max'] = df.eval('weight + weight * reps / 30')
    
    #calculate speed for cardio
    df['time'] = pd.to_datetime(df.time)
    df['time'] = df.time.dt.minute * 60 + df.time.dt.second
    df['speed'] = df.eval('distance / time')
    
    #impute workout day by the max number of lifts in a category
    df['workout_day'] = df.groupby('date').category.transform(lambda s: s.value_counts(ascending = False).index[0])
    
    #determine completed sets
    df['set_completed'] = np.where(df.weight > 0, 1, 0)

    #Assign the routine each workout belongs to
    #iterate through routines database
    for row in routines.itertuples():
        #determine whether the dates of a workout category from fitnotes are within that categories start
        #and end dates from the routines database
        is_between_dates = df[df.category == row.category].date.between(row.start_date, row.end_date)
        #if true, assign the corresponding routine; else, NaN
        df.loc[df.category == row.category, 'routine'] = np.where(is_between_dates, row.routine, np.NaN)
    #replacing string nans with numpy nans, which propagate from np where for some reason, return to fix
    df.routine = df.routine.replace('nan', np.NaN)
    
    #set index as date for resampling later
    df = df.set_index('date')

    return df

def compute_gains_matrix(df):
    '''Create matrix of aggregated key performance indicators for workouts'''
    #remove cardio workouts
    df = df.loc[df.category != 'Cardio']
    #filling nans so the max aggfunc can work on the routine
    df.routine = df.routine.replace(np.NaN, 'n/a')
    #get key performance indicators for the workout
    gains = df.pivot_table(index = 'date', 
                           values = ['volume', 'reps', 'weight', 'workout_day', 'set_completed', 'one_rep_max', 'routine'],
                           aggfunc = {'volume': 'sum', 
                                            'reps': 'mean', 
                                            'weight': 'max', 
                                            'one_rep_max': 'max',
                                            'workout_day': 'max',
                                            'set_completed': 'mean',
                                            'routine': 'max'})

    #renaming the set completed to completion rate because all sets comprise a workout
    gains = gains.rename(columns = {'set_completed': 'completion_rate',
                                    'reps': 'mean_reps', 
                                    'weight': 'max_weight',
                                    'volume': 'total_volume'})
    workout_dates = df.index.unique()
    gains['primary_lift'] = df.groupby('date').apply(lambda g: g.set_index('exercise').weight.idxmax())
    gains['days_since_last_workout'] = gains.reset_index().date.diff().fillna(0).values
    
    gains = gains.reset_index().sort_values(['workout_day', 'date'])
    gains['days_since_last_workout_day'] = gains.groupby('workout_day').apply(lambda g: g.reset_index().date.diff().fillna(0)).values
    gains['weekday'] = gains.date.dt.weekday_name
    born = date(1995, 1, 1)
    gains['age'] = gains['date'].dt.date.sub(born).dt.days.div(365).apply(np.floor).astype(int)
    gains = gains.sort_values('date')
    gains = gains.merge(weight[['date', 'weight']], on = 'date', how = 'left')
    gains['weight'] = gains.weight.interpolate()
    return gains

In [3]:
fitnotes = pd.read_csv('fitnotes/FitNotes_Export.csv').pipe(format_columns)
routines = pd.read_csv('routines.csv', 
                         parse_dates = ['Start Date', 'End Date']).pipe(format_columns)

In [4]:
weight = pd.read_csv('fitbit/weight.csv', parse_dates = ['date'])
lifts = impute_workout_features(fitnotes, routines)

In [5]:
lifts.to_csv('lifts.csv')

In [6]:
gains = compute_gains_matrix(lifts)
gains.to_csv('gains.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


In [7]:
gains.head()

Unnamed: 0,date,one_rep_max,mean_reps,routine,completion_rate,total_volume,max_weight,workout_day,primary_lift,days_since_last_workout,days_since_last_workout_day,weekday,age,weight
0,2014-06-07,200.0,8.222222,,1.0,8114.0,150.0,Back,Lat Pulldown,0 days,0 days,Saturday,19,
1,2014-06-08,73.333333,10.0,,0.75,1650.0,55.0,Chest,Flat Dumbbell Bench Press,1 days,0 days,Sunday,19,
2,2014-06-10,300.0,10.0,,1.0,12900.0,225.0,Legs,Leg Press,2 days,0 days,Tuesday,19,
3,2014-06-17,73.333333,8.2,,1.0,1995.0,55.0,Chest,Flat Dumbbell Bench Press,7 days,9 days,Tuesday,19,
4,2014-06-19,320.0,8.153846,,1.0,10780.0,240.0,Legs,Leg Press,2 days,9 days,Thursday,19,
