In [1]:
#basic imports
import pandas as pd
import numpy as np

#fitbit packages 
import fitbit
from python_fitbit import gather_keys_oauth2 as Oauth2

#time libraries
from datetime import datetime, timedelta
import pause

#data importing libraries
import ast
import glob
import json
import os

In [2]:
#set globals
#date I joined Fitbit
date_joined_fitbit = '2016-05-21'
today = datetime.now().date()
days = pd.date_range(start = date_joined_fitbit, end = today, freq = 'D')

#import lifting data 
saved_workouts = glob.glob('fitnotes/Fitnotes*.csv')
fitnotes = pd.read_csv(saved_workouts[-1], index_col = 'Date')
#get unique workout dates as datetime index as of the first date I joined fitbit
lifting_days = pd.to_datetime(fitnotes.index).intersection(days).unique()
#for these dates, I did workouts without fitbit activity data; the earliest record
#I have is 2016-05-29
activities_skip_dates = pd.to_datetime(pd.Index(['2016-05-23', '2016-05-24', '2016-05-25', '2016-05-26']))

In [3]:
def sleep_until_api_refresh():
    '''Sleep until the next time fitbit api refreshes'''
    #Identify next top of the hour from now
    #add 5 minutes in case fitbit is slow to update
    t = datetime.now()
    resume_time = t - timedelta(hours = -1, minutes = t.minute - 5, 
                                seconds = t.second, microseconds = t.microsecond)
    print(f'Sleeping until {str(resume_time.time())}')
    #pause until identified time
    pause.until(resume_time)
    
def activate_fitbit():
    '''
    Authorize the script to access my Fitbit data
    and return fitbit python object
    '''
    #import fitbit application credentials as a dictionary
    filepath = 'fitbit/fitbit_credentials.txt'
    with open(filepath, mode = 'r') as file:
        credentials = ast.literal_eval(file.read())
    
    #instantiate fitbit object
    client = fitbit.Fitbit(credentials['client_id'], 
                           credentials['client_secret'], 
                           oauth2=True, 
                           refresh_cb = True,
                           access_token=credentials['access_token'], 
                           refresh_token=credentials['refresh_token'])
    return client

def get_weight():
    '''
    Get historical weight data from Fitbit.
    Note: I log my weight using the MyFitnessPal app, which Fitbit
    downloads from. Because MyFitnessPal does not have a public API, I would
    have to download my weight statistics manually. This script allows me to
    programmatically access that data through Fitbit's connection to MyFitnessPal
    I also use this instead of the time series fitbit method because time series
    method imputes days where no weight was entered as the last known date. This way,
    I can manually interpolate the interim days' weight myself.
    '''
    exists = os.path.isfile('fitbit/weight.csv')
    try:
        #update with latest weight if weight file exists
        if exists:
            weight = pd.read_csv('fitbit/weight.csv', parse_dates = ['date'])
            latest_weight_month = pd.Index(weight.date).snap('MS').max().date()
            months = pd.date_range(latest_weight_month, today, freq = 'MS') + pd.DateOffset(months=1)
            for month in months:
                df = pd.DataFrame(client.get_bodyweight(base_date = month, period = '1m')['weight'])
                #append to dataframe
                weight = weight.append(df, ignore_index = True, sort = False)
            #dropping duplicates in case I'm re-adding older data
            weight = weight.drop_duplicates(subset = 'date')

        #if there is no weight file, backfill weight data from scratch; offset by a month since data
        #collection looks backward a month; this lets me get current data from this month as well
        else:
            months = pd.date_range(start = date_joined_fitbit, end = today, freq = 'MS') + pd.DateOffset(months=1)
            #create empty list
            weight = []
            #iterate through period index of frequency month
            for month in months:
                #create dataframe of weight data starting from the first of the month
                #to the end of the month
                df = pd.DataFrame(client.get_bodyweight(base_date = month, period = '1m')['weight'])
                #append to list
                weight.append(df)
            #concatenate into dataframe and export
            weight = pd.concat(weight, ignore_index = True, sort = False)
    except:
        sleep_until_api_refresh()
        
    weight.to_csv('fitbit/weight.csv', index = False)
    print('Weight update complete')

def get_intraday_data(data):
    '''
    Get heart rata data at sub-minute granularity on workout days,
    updating for days I'm missing. 
    "heart" for heart rate, "calories" for calories
    '''
    
    '''identify days for which no heart rate data is downloaded'''
    #if a download folder doesn't exist, create it
    if os.path.exists(f"fitbit/{data}/") == False:
        os.makedirs(f"fitbit/{data}/")
        
    #get list of heart rate files
    current_files = glob.glob(f'fitbit/{data}/*.json')
    
    #if there are no files in the folder, set the dates to download as 
    #the lifting days
    if len(current_files) == 0:
        dates_to_download = lifting_days
    
    #if there are some files downloaded, identify days for which there is no data
    else:
        #create index of filelist and replace all non-digit characters, leaving only the date
        downloaded_days = pd.Index(current_files).str.replace('\D+', '')
        #convert to datetime index 
        downloaded_days = pd.to_datetime(downloaded_days)

        #get dates in lifting days but not saved in a folder
        dates_to_download = lifting_days.difference(downloaded_days)

    '''download intraday data for all undownloaded days'''
    #set dictionary of level of granularity to pull for each data type
    if data == 'heart':
        interval = '1sec'
    else:
        interval = '1min'
    
    if len(dates_to_download) == 0:
        print(f'{data} already up to date')
    else:   
        #download intraday data for all undownloaded days
        for day in dates_to_download:
            try:
                #convert date from timestamp to string
                day = str(day.date())
                #get intraday heart rate data at second granularity
                series = client.intraday_time_series(f'activities/{data}', 
                                                        base_date= day, 
                                                        detail_level= interval)
                #save to json
                with open(f'fitbit/{data}/{data}_{day}.json', 'w') as outfile:
                    json.dump(series, outfile)

            except:
                #if the function hits an exception by hitting the fitbit rate limit, 
                #sleep for an hour
                #Fitbit's api has a rate limit of 150 requests per hour
                #which resets at the top of each hour, not necessarily an hour
                #after reaching the limit
                print(Exception)
                sleep_until_api_refresh()
        print(f'{data} update complete')

def get_activities_report():
    '''Get list of activities logged in fitbit tracker'''
    #make finite loop
    files = glob.glob('fitbit/activities/*.csv')
    
    if len(files) == 0:
        dates_to_download = lifting_days

    else:
        #import downloaded activities
        activities = pd.concat([pd.read_csv(file, parse_dates = ['startTime'], usecols = ['startTime']) for file in files])
        activities = (activities
                      .assign(startTime = pd.to_datetime(activities.startTime.dt.date))
                      .drop_duplicates(subset = 'startTime')
                      .set_index('startTime')
                      .asfreq('D')
                     )
        #convert to datetime index 
        downloaded_days = activities.index.append(activities_skip_dates)
        #get dates in lifting days but not saved in a folder
        dates_to_download = lifting_days.difference(downloaded_days)
        if len(dates_to_download) == 0:
            print('Activities already up to date')
            return

    afterDate = dates_to_download[0].date()
    for i in range(len(dates_to_download)):
        try:
            if afterDate > dates_to_download[i + 1]:
                continue
        except IndexError:
            print('End of date range reached: Proceeding to last date range.')
        afterDate = dates_to_download[i].date()
        try:
            #ping url for the next 20 activities >= afterDate
            parameters = f'afterDate={afterDate}&offset=0&limit=20&sort=asc'
            activities = client.make_request(f'https://api.fitbit.com/1/user/-/activities/list.json&{parameters}')
            #set the afterdate as the latest startTime date; there will be
            #overlap, but keeping overlap is only way to ensure some
            #activities are fully captured since the 20 activity limit
            #can hit before reaching all activities done on a particular
            #date. Note that getting the date value sometimes rounds 
            #the date up, so there's some error in the nomenclature
            #but the data is unaffected since afterDate is pulled from
            #columns without modifying them
            df = pd.DataFrame(activities['activities'])
            afterDate = df.startTime.apply(pd.to_datetime).dt.date.max()
            #download file as of 20 activities at or after afterDate
            df.to_csv(f'fitbit/activities/activities_{afterDate}.csv', index = False)
        except:
            sleep_until_api_refresh()
    print('Activity log update complete')
    return

def get_sleep():
    '''
    Get all fitbit sleep records.
    This function gets all sleep records instead of filtering down by workout day
    because fitbit allows larger spans of dates per function call, saving api calls
    for much more data
    '''
    #get list of heart rate files
    files = glob.glob(f'fitbit/sleep/*.json')
    
    #if there are no files in the folder, set the dates to download as 
    #the lifting days
    if len(files) == 0:
        date_span = pd.date_range(date_joined_fitbit, today, freq = '1M').date
    
    else:
        #create index of filelist and replace all non-digit characters, leaving only the dates
        #apply lambda function grabbing last 8 characters, representing the end date
        downloaded_days = pd.Index(files).str.replace('\D+', '').map(lambda x: x[-8:])
        #convert to datetime index 
        downloaded_days = pd.to_datetime(downloaded_days)
        latest_date = downloaded_days.max()
        #set this to update every other day just to make it work
        date_span = pd.date_range(latest_date, today, freq = '1D').date
    try:
        for counter in range(len(date_span)):
            if counter + 1 < len(date_span):
                start_date = date_span[counter] 
                end_date = (date_span[counter + 1] - pd.DateOffset(days = 1)).date()
                url = f'https://api.fitbit.com/1.2/user/-/sleep/date/{start_date}/{end_date}.json'
                sleep = client.make_request(url)
                #name files as of a (month - 1 day) days <= end date
                with open(f'fitbit/sleep/sleep_{start_date}_{end_date}.json', 'w') as file:
                    json.dump(sleep, file)
            else:
                print('End of date range reached: Finishing sleep update')
    except:
        sleep_until_api_refresh()
        
    print('Sleep update complete')

In [7]:
client = activate_fitbit()
activity_indicators = ['heart', 'calories', 'steps', 'distance',
                       'floors', 'elevation']
for indicator in activity_indicators:
    get_intraday_data(indicator)
get_activities_report()
get_sleep()
get_weight()

heart already up to date
calories already up to date
steps already up to date
distance already up to date
floors already up to date
elevation already up to date
Activities already up to date
End of date range reached: Finishing sleep update
Sleep update complete
Weight update complete


Todo: 
- Handle updates on days that have already been pulled but not all data was collected
    - Somehow determine whether data is complete or not. Maybe reaching into the time aspect of the data could work. Until then, refreshing once a day seems the better choice, if inflexible
    - Maybe capture up until the previous day so I don't get incomplete data from the current day