In [1]:
#basic imports
import pandas as pd
import numpy as np

#fitbit packages 
import fitbit
from python_fitbit import gather_keys_oauth2 as Oauth2

#time libraries
from datetime import datetime, timedelta
import pause

#data importing libraries
import ast
import glob
import json
import os

In [2]:
#set globals
#date I joined Fitbit
date_joined_fitbit = '2016-05-21'
today = datetime.now().date()
days = pd.date_range(start = date_joined_fitbit, end = today, freq = 'D')

#import lifting data 
lifts = pd.read_csv('fitnotes/Fitnotes_Export.csv', index_col = 'Date')
#get unique workout dates as datetime index as of the first date I joined fitbit
lifting_days = pd.to_datetime(lifts.index).intersection(days).unique()

In [87]:
def sleep_until_api_refresh():
    '''Sleep until the next time fitbit api refreshes'''
    #Identify next top of the hour from now
    #add 5 minutes in case fitbit is slow to update
    t = datetime.now()
    resume_time = t - timedelta(hours = -1, minutes = t.minute - 5, 
                                seconds = t.second, microseconds = t.microsecond)
    print(f'Sleeping until {str(resume_time.time())}')
    #pause until identified time
    pause.until(resume_time)
    
def activate_fitbit():
    '''
    Authorize the script to access my Fitbit data
    and return fitbit python object
    '''
    #import fitbit application credentials as a dictionary
    filepath = 'fitbit/fitbit_credentials.txt'
    with open(filepath, mode = 'r') as file:
        credentials = ast.literal_eval(file.read())
    
    #instantiate fitbit object
    client = fitbit.Fitbit(credentials['client_id'], 
                           credentials['client_secret'], 
                           oauth2=True, 
                           refresh_cb = True,
                           access_token=credentials['access_token'], 
                           refresh_token=credentials['refresh_token'])
    return client

def get_sleep(days):
    sleep_list = []
    for day in days:
        json = client.get_sleep(day)
        stats = pd.DataFrame(json['sleep'])
        summary = pd.DataFrame(json['summary'], index = [0])
        df = pd.concat([stats, summary], axis = 1)
        sleep_list.append(df)
    sleep = pd.concat(sleep_list, ignore_index = True, sort = False)
    sleep.to_csv('sleep.csv', index = False)
    return sleep

def get_weight():
    '''
    Get historical weight data from Fitbit.
    Note: I log my weight using the MyFitnessPal app, which Fitbit
    downloads from. Because MyFitnessPal does not have a public API, I would
    have to download my weight statistics manually. This script allows me to
    programmatically access that data through Fitbit's connection to MyFitnessPal
    I also use this instead of the time series fitbit method because time series
    method imputes days where no weight was entered as the last known date. This way,
    I can manually interpolate the interim days' weight myself.
    '''
    exists = os.path.isfile('fitbit/weight.csv')
    
    #update with latest weight if weight file exists
    if exists:
        weight = pd.read_csv('fitbit/weight.csv', parse_dates = ['date'])
        latest_weight_month = pd.Index(weight.date).snap('MS').max().date()
        months = pd.date_range(latest_weight_month, today, freq = 'MS') + pd.DateOffset(months=1)
        for month in months:
            df = pd.DataFrame(client.get_bodyweight(base_date = month, period = '1m')['weight'])
            #append to dataframe
            weight = weight.append(df, ignore_index = True, sort = False)
        #dropping duplicates in case I'm re-adding older data
        weight = weight.drop_duplicates(subset = 'date')
    
    #if there is no weight file, backfill weight data from scratch; offset by a month since data
    #collection looks backward a month; this lets me get current data from this month as well
    else:
        months = pd.date_range(start = date_joined_fitbit, end = today, freq = 'MS') + pd.DateOffset(months=1)
        #create empty list
        weight = []
        #iterate through period index of frequency month
        for month in months:
            #create dataframe of weight data starting from the first of the month
            #to the end of the month
            df = pd.DataFrame(client.get_bodyweight(base_date = month, period = '1m')['weight'])
            #append to list
            weight.append(df)
        #concatenate into dataframe and export
        weight = pd.concat(weight, ignore_index = True, sort = False)
        
    weight.to_csv('fitbit/weight.csv', index = False)

def get_intraday_data(data):
    '''
    Get heart rata data at sub-minute granularity on workout days,
    updating for days I'm missing. 
    "heart" for heart rate, "calories" for calories
    '''
    
    '''identify days for which no heart rate data is downloaded'''
    #if a download folder doesn't exist, create it
    if os.path.exists(f"fitbit/{data}/") == False:
        os.makedirs(f"fitbit/{data}/")
        
    #get list of heart rate files
    current_files = glob.glob(f'fitbit/{data}/*.json')
    
    #if there are no files in the folder, set the dates to download as 
    #the lifting days
    if len(current_files) == 0:
        dates_to_download = lifting_days
    
    #if there are some files downloaded, identify days for which there is no data
    else:
        #create index of filelist and replace all non-digit characters, leaving only the date
        downloaded_days = pd.Index(current_files).str.replace('\D+', '')
        #convert to datetime index 
        downloaded_days = pd.to_datetime(downloaded_days)

        #get dates in lifting days but not saved in a folder
        dates_to_download = lifting_days.difference(downloaded_days)

    '''download intraday data for all undownloaded days'''
    #set dictionary of level of granularity to pull for each data type
    if data == 'heart':
        interval = '1sec'
    else:
        interval = '1min'
    
    if len(dates_to_download) == 0:
        print(f'{data} already up to date')
    else:   
        #download intraday data for all undownloaded days
        for day in dates_to_download:
            try:
                #convert date from timestamp to string
                day = str(day.date())
                #get intraday heart rate data at second granularity
                series = client.intraday_time_series(f'activities/{data}', 
                                                        base_date= day, 
                                                        detail_level= interval)
                #save to json
                with open(f'fitbit/{data}/{data}_{day}.json', 'w') as outfile:
                    json.dump(series, outfile)

            except:
                #if the function hits an exception by hitting the fitbit rate limit, 
                #sleep for an hour
                #Fitbit's api has a rate limit of 150 requests per hour
                #which resets at the top of each hour, not necessarily an hour
                #after reaching the limit
                print(str(Exception))
                sleep_until_api_refresh()
        print(f'{data} update complete')

def get_activities_report():
    '''Get list of activities logged in fitbit tracker'''
    
    files = glob.glob('fitbit/activities/*.csv')
    
    if len(files) == 0:
        afterDate = pd.to_datetime(date_joined_fitbit).date()

    else:
        #create index of filelist and replace all non-digit characters, leaving only the date
        downloaded_days = pd.Index(files).str.replace('\D+', '')
        #convert to datetime index 
        downloaded_days = pd.to_datetime(downloaded_days)
        #get dates in lifting days but not saved in a folder
        dates_to_download = lifting_days.difference(downloaded_days)
        afterDate = dates_to_download.min().date()

    while afterDate < lifting_days.max().date():
        try:
            parameters = f'afterDate={afterDate}&offset=0&limit=20&sort=asc'
            activities = client.make_request(f'https://api.fitbit.com/1/user/-/activities/list.json&{parameters}')
            df = pd.DataFrame(activities['activities'])
            df['originalStartTime'] = pd.to_datetime(df.originalStartTime).dt.date
            afterDate = df.originalStartTime.max()
            earliest_date = str(df.originalStartTime.min())
            df.to_csv(f'fitbit/activities/activities_{earliest_date}.csv', index = False)
        except:
            sleep_until_api_refresh()

In [5]:
client = activate_fitbit()

activity_indicators = ['heart', 'calories', 'steps', 'distance',
                       'floors', 'elevation']
for indicator in activity_indicators:
    get_intraday_data(indicator)

heart update complete
calories update complete
steps update complete
distance update complete
floors update complete
elevation update complete


In [None]:
get_activities_report()

In [88]:
get_weight()

Todo: 
- Handle updates on days that have already been pulled but not all data was collected
    - Somehow determine whether data is complete or not. Maybe reaching into the time aspect of the data could work. Until then, refreshing once a day seems the better choice, if inflexible
    - Maybe capture up until the previous day so I don't get incomplete data from the current day
- Handle exception when improper grant is given because tokens have elapsed