# Full Data ETL

## Setup

In [1]:
import shutil
import datetime
import time
import numpy as np
import pandas as pd
import configparser
import fitbit
import myfitnesspal # was using '1.13.4' before upgraded to '1.16.1'
from nokia import NokiaApi, NokiaCredentials  # Withings
from sqlalchemy import create_engine
import json

pd.set_option('display.max_rows', 500)

### Files

In [7]:
# weight forecaster db
server_dir = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/weightforecaster/server/'
db_dir = server_dir + 'db/'
backups_dir = db_dir + 'backups/'
db_name = 'weightforecaster'
db_ext = '.db'
db_file_name = db_dir + db_name + db_ext
db_seed_name = db_dir + 'SEED_weightforecaster_2013-01-01_to_2016-12-25' + db_ext

# config file
cfg_file = server_dir + 'config/api_params.cfg'


# weight
logger_file = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/weight/pre_withings/loggr_weight.csv'
askmeevery_file = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/weight/pre_withings/askmeevery_weight.csv'
withings_file = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/weight/withings/data_JAM_1611609119/weight.csv'
weight_full_file = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/weight/weight_full.csv'

# food
food_tot_file = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/food/food_tot.csv'
food_meals_tot_file = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/food/food_meals_tot.csv'
food_diary_json_file = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/food/food_diary.json'
food_diary_file = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/food/food_diary.csv'

# fitbit
steps_daily_file = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/fitbit/steps_daily.csv'
steps_activity_dir = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/fitbit/activities/'
steps_detail_dir = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/fitbit/detailed_steps/'
hr_detail_dir = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/fitbit/detailed_heart_rate/'
sleep_detail_dir = '/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/fitbit/detailed_sleep/'

### Functions

In [8]:
def persist_fitbit_refresh_token(token_dict, cfg_file):
    parser = configparser.ConfigParser()
    parser.read(cfg_file)
    parser.set('fitbit', 'access_token', token_dict['access_token'])
    parser.set('fitbit', 'refresh_token', token_dict['refresh_token'])
    parser.set('fitbit', 'expires_at', "{:.6f}".format(token_dict['expires_at']))
    with open(cfg_file, 'w') as configfile:
        parser.write(configfile)


def ts():
    return int((
        datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1)
    ).total_seconds())


def persist_nokia_refresh_token(token_dict, cfg_file):
    exp_time = str(ts()+int(token_dict['expires_in']))
    parser = configparser.ConfigParser()
    parser.read(cfg_file)
    parser.set('nokia', 'access_token', token_dict['access_token'])
    parser.set('nokia', 'refresh_token', token_dict['refresh_token'])
    parser.set('nokia', 'token_type', token_dict['token_type'])
    parser.set('nokia', 'token_expiry', exp_time)
    if 'user_id' in token_dict:
        parser.set('nokia', 'user_id', token_dict['userid'])
    with open(cfg_file, 'w') as configfile:
        parser.write(configfile)

### Load DB

In [9]:
engine = create_engine('sqlite:///'+db_file_name)
with engine.connect() as conn, conn.begin():
    db_df = pd.read_sql_table('fitness', conn, index_col='date', parse_dates=['date'])
    
engine = create_engine('sqlite:///'+db_seed_name)
with engine.connect() as conn, conn.begin():
    db_seed_df = pd.read_sql_table('fitness', conn, index_col='date', parse_dates=['date'])

In [10]:
db_df.tail(10)

Unnamed: 0_level_0,weight,calories,steps,weight_imputed,w_7day_avg,c_7day_avg,s_7day_avg,w_7day_avg_last_week,c_7day_avg_last_week,s_7day_avg_last_week,w_7day_avg_weekly_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-01-25,152.4,2265.0,11149.0,,152.814286,2053.571429,15047.285714,155.257143,2167.714286,14029.714286,-2.442857
2021-01-26,153.8,2143.0,11689.0,,152.985714,2121.714286,13668.285714,154.8,2065.714286,14642.142857,-1.814286
2021-01-27,153.8,2297.0,11362.0,,153.157143,2165.714286,12886.857143,154.4,2002.0,15322.428571,-1.242857
2021-01-28,153.4,3401.0,13443.0,,153.228571,2338.428571,12399.285714,153.857143,1913.0,16166.714286,-0.628571
2021-01-29,153.7,1160.0,10981.0,,153.342857,2217.714286,11833.142857,153.257143,1878.285714,16550.571429,0.085714
2021-01-30,154.2,,2760.0,,,,,,,,
2021-01-31,155.5,,8849.0,,,,,,,,
2021-02-01,157.3,,11742.0,,,,,,,,
2021-02-02,155.3,,13013.0,,,,,,,,
2021-02-03,155.2,,9018.0,,,,,,,,


## Initialize API configs (one-time eval)

### Nokia / Withings

See the following:
- https://github.com/orcasgit/python-nokia
- https://github.com/orcasgit/python-nokia/blob/master/nokia/__init__.py

In [None]:
parser = configparser.ConfigParser()
parser.read(cfg_file)
CLIENT_ID = parser.get('withings', 'client_id')
CLIENT_SECRET = parser.get('withings', 'client_secret')
REDIRECT_URI = parser.get('withings', 'redirect_uri')

auth = NokiaAuth(CLIENT_ID, CLIENT_SECRET, callback_uri=REDIRECT_URI)
authorize_url = auth.get_authorize_url()

# open the following in a browser, click allow, it redirects. Copy the 'code' parameter from that url.
print(authorize_url)

In [None]:
# set this to the alphanumeric string value appearing for the 'code' parameter in the url
code = 'xxx'

In [None]:
res = requests.post(url = 'https://account.withings.com/oauth2/token', 
              data = {
                  'grant_type':'authorization_code',
                  'client_id':CLIENT_ID,
                  'client_secret':CLIENT_SECRET,
                  'redirect_uri':REDIRECT_URI,
                  'code':code
              })   
token_dict = json.loads(res.content)
token_dict

In [None]:
etl.persist_nokia_refresh_token(token_dict, cfg_file)

### Fitbit

See the following:

- https://python-fitbit.readthedocs.io/en/latest/

In a terminal, cd to location of python-fitbit repo (clone it from github: https://github.com/orcasgit/python-fitbit).

Then run the following:

```
python gather_keys_oauth2.py <client_key> <client_secret>
```

This script has a callback function for persisting the refresh token. Make sure the path to the api_params.cfg file is properly set (probably good to test it out…)

### MyFitnessPal

See the following:

- https://github.com/coddingtonbear/python-myfitnesspal

In a terminal, run the following command to set up authentication (locally storing your user credentials):

```
myfitnesspal store-password my_username
```

## Weight

### Pre-Withings data

In [16]:
ame = pd.read_csv(askmeevery_file).rename({'original':'wgt_ame'}, axis=1).drop({'answer', 'unit'}, axis=1)
ame.date = pd.to_datetime([x.date() for x in pd.to_datetime(ame.date)])
ame = ame.sort_values('date').copy().reset_index(drop=True)

logger=pd.read_csv(logger_file).rename({'weight':'wgt_logger'}, axis=1)
logger.date = pd.to_datetime([x.date() for x in pd.to_datetime(logger.date)])
logger = logger.sort_values('date').copy().reset_index(drop=True)

### Withings data

In [17]:
withings=pd.read_csv(withings_file).rename({'Weight (lb)': 'wgt_withings'}, axis=1)
withings['date'] = pd.to_datetime([x.date() for x in pd.to_datetime(withings.Date)])
withings['timestamp'] = pd.to_datetime(withings.Date)
withings.drop(['Date', 'Comments'], axis=1, inplace=True)
withings = withings.sort_values('timestamp')[['date', 'wgt_withings', 'timestamp', 'Fat mass (lb)', 'Bone mass (lb)', 'Muscle mass (lb)', 'Hydration (lb)']].copy().reset_index(drop=True)

# eliminate multiple measurements in a day by taking last measured value on that day
withings = withings.groupby('date').last().reset_index()

In [18]:
withings.dtypes

date                datetime64[ns]
wgt_withings               float64
timestamp           datetime64[ns]
Fat mass (lb)              float64
Bone mass (lb)             float64
Muscle mass (lb)           float64
Hydration (lb)             float64
dtype: object

In [19]:
withings.tail()

Unnamed: 0,date,wgt_withings,timestamp,Fat mass (lb),Bone mass (lb),Muscle mass (lb),Hydration (lb)
1971,2021-01-21,152.9,2021-01-21 07:07:59,23.1,6.5,123.3,88.7
1972,2021-01-22,153.0,2021-01-22 07:10:54,23.5,6.5,123.0,88.3
1973,2021-01-23,153.6,2021-01-23 07:29:43,23.8,6.5,123.3,88.6
1974,2021-01-24,152.7,2021-01-24 07:44:02,22.9,6.5,123.3,88.8
1975,2021-01-25,152.4,2021-01-25 07:28:44,23.5,6.5,122.5,87.8


#### API

**NOTE**

It was easier for the purposes of an infrequent data dump, to just download the data from the Withings web site (which is self explanatory). But I include the code here for the API if needed.

In the code below, meastype=1 refers only to weight. I couldn't find a way to grab all measure types in one call. The list of meastype codes are on the API ref page on the web.

In [7]:
# parser = configparser.ConfigParser()
# parser.read(cfg_file)

# client_id = parser.get('nokia', 'client_id')
# client_secret = parser.get('nokia', 'client_secret')
# access_token = parser.get('nokia', 'access_token')
# token_expiry = parser.get('nokia', 'token_expiry')
# token_type = parser.get('nokia', 'token_type')
# refresh_token = parser.get('nokia', 'refresh_token')
# user_id = parser.get('nokia', 'user_id')

# creds = NokiaCredentials(access_token=access_token,
#                          token_expiry=token_expiry,
#                          token_type=token_type,
#                          refresh_token=refresh_token,
#                          user_id=user_id,
#                          client_id=client_id,
#                          consumer_secret=client_secret)

# client = NokiaApi(creds, refresh_cb=(lambda x: persist_nokia_refresh_token(x, cfg_file)))

In [24]:
# measures = client.get_measures(meastype=1)
# len(measures)

In [23]:
# weight_json = [{'weight':(float("{:.1f}".format(x.weight*2.20462))), 'date':x.date.strftime('%Y-%m-%d')} for x in measures]

### Merging datasets

In [20]:
weight_full = pd.merge(ame, logger, how='outer', on='date')
weight_full = pd.merge(weight_full, withings, how='outer', on='date')
weight_full = weight_full.sort_values('date').copy().reset_index(drop=True)

In [21]:
dates_df = pd.DataFrame({'date':pd.date_range('2013-01-01', '2021-01-24', freq='d')})
print(len(dates_df))

weight_full = pd.merge(dates_df, weight_full, on='date', how='left')
print(len(weight_full))

2946
2946


In [22]:
def assign_wgt(x):
    wgt = np.nan
    if x['wgt_withings'] > 0:
        wgt = x['wgt_withings']
    elif x['wgt_logger'] > 0:
        wgt = x['wgt_logger']
    elif x['wgt_ame'] > 0:
        wgt = x['wgt_ame']
    return wgt

weight_full['weight'] = weight_full.apply(assign_wgt, axis=1)
weight_full = weight_full[[
    'date',
    'weight',
    'wgt_ame',
    'wgt_logger',
    'wgt_withings',
    'timestamp',
    'Fat mass (lb)',
    'Bone mass (lb)',
    'Muscle mass (lb)',
    'Hydration (lb)'
]]

In [280]:
# sanity check: compare to previously aggregated data for differences; source of diff should be understood
foo = weight_full.copy()
foo = pd.merge(foo, db_df.reset_index()[['date','weight']].rename({'weight':'wgt_db'}, axis=1), on='date', how='left')[['date', 'weight', 'wgt_db']]
foo['wgt_diff'] = abs(foo.weight-foo.wgt_db)

In [281]:
# most of these diffs stem from having multiple measurements on a day, and taking one versus the other
# however, on 2016-08-11: there is only one entry, and the latest pull from Withings shows 165.6, not 166.7. I'm ok with that.
foo[foo.wgt_diff>0.3]

Unnamed: 0,date,weight,wgt_db,wgt_diff
710,2014-12-12,178.5,178.0,0.5
824,2015-04-05,174.3,175.5,1.2
1317,2016-08-10,166.7,166.1,0.6
1318,2016-08-11,165.6,166.7,1.1


### Export data to file

In [23]:
weight_full['date'] = [x.date() for x in weight_full['date']]
weight_full.to_csv(weight_full_file, index=False)

## Food

In [11]:
client = myfitnesspal.Client('jamieinfinity')

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [24]:
start_date = datetime.datetime.strptime('2015-09-16', '%Y-%m-%d').date()
end_date = datetime.datetime.strptime('2021-01-24', '%Y-%m-%d').date()
num_days = (end_date - start_date).days+1

dates=[]
daily_totals = []
daily_meal_totals = []
daily_diary = []
for d in range(num_days):
    dq = end_date - datetime.timedelta(days=d)
    print((round(d/num_days, 2), dq))
    data = client.get_date(dq)

    dates.append(dq)
    daily_totals.append(data.totals)
    daily_meal_totals.append([x.totals for x in data.meals])
    daily_diary.append(data.get_as_dict())

(0.0, datetime.date(2021, 1, 24))
(0.0, datetime.date(2021, 1, 23))
(0.0, datetime.date(2021, 1, 22))
(0.0, datetime.date(2021, 1, 21))
(0.0, datetime.date(2021, 1, 20))
(0.0, datetime.date(2021, 1, 19))
(0.0, datetime.date(2021, 1, 18))
(0.0, datetime.date(2021, 1, 17))
(0.0, datetime.date(2021, 1, 16))
(0.0, datetime.date(2021, 1, 15))
(0.01, datetime.date(2021, 1, 14))
(0.01, datetime.date(2021, 1, 13))
(0.01, datetime.date(2021, 1, 12))
(0.01, datetime.date(2021, 1, 11))
(0.01, datetime.date(2021, 1, 10))
(0.01, datetime.date(2021, 1, 9))
(0.01, datetime.date(2021, 1, 8))
(0.01, datetime.date(2021, 1, 7))
(0.01, datetime.date(2021, 1, 6))
(0.01, datetime.date(2021, 1, 5))
(0.01, datetime.date(2021, 1, 4))
(0.01, datetime.date(2021, 1, 3))
(0.01, datetime.date(2021, 1, 2))
(0.01, datetime.date(2021, 1, 1))
(0.01, datetime.date(2020, 12, 31))
(0.01, datetime.date(2020, 12, 30))
(0.01, datetime.date(2020, 12, 29))
(0.01, datetime.date(2020, 12, 28))
(0.01, datetime.date(2020, 12, 27))

In [25]:
# save daily dairy data in raw json format

# add the date to each entry
[daily_diary[i].update({'date':dates[i].strftime('%Y-%m-%d')}) for i in range(len(dates))]
with open(food_diary_json_file, 'w', encoding='utf-8') as f:
    json.dump(daily_diary, f, ensure_ascii=False, indent=4)    

In [26]:
# save daily totals

daily_totals_df = pd.DataFrame(daily_totals)
daily_totals_df['date'] = dates
daily_totals_df.sort_values('date', inplace=True)
daily_totals_df.reset_index(inplace=True, drop=True)
daily_totals_df = daily_totals_df[['date', 'calories', 'carbohydrates', 'fat', 'protein', 'sodium', 'sugar']]
daily_totals_df.to_csv(food_tot_file, index=False)

In [27]:
# save daily meal totals

def add_date_meal(x, d):
    x[0]['date']=d
    x[0]['meal']='breakfast'
    x[0]['meal_ind']=0
    x[1]['date']=d
    x[1]['meal']='lunch'
    x[1]['meal_ind']=1
    x[2]['date']=d
    x[2]['meal']='dinner'
    x[2]['meal_ind']=2
    x[3]['date']=d
    x[3]['meal']='snack'
    x[3]['meal_ind']=3
    return x

daily_meal_totals_df = [add_date_meal(daily_meal_totals[i], dates[i]) for i in range(len(dates))]
daily_meal_totals_df = [item for sublist in daily_meal_totals_df for item in sublist]
daily_meal_totals_df = pd.DataFrame(daily_meal_totals_df).fillna(0)
daily_meal_totals_df.sort_values(['date', 'meal_ind'], inplace=True)
daily_meal_totals_df.reset_index(inplace=True, drop=True)
daily_meal_totals_df = daily_meal_totals_df[['date', 'meal', 'calories', 'carbohydrates', 'fat', 'protein', 'sodium', 'sugar']]
daily_meal_totals_df.to_csv(food_meals_tot_file, index=False)

In [28]:
# save daily diary entries

# first, open daily diary json file
with open(food_diary_json_file) as data_file:
    daily_diary_saved = json.load(data_file)

def add_date_meal_to_diary(day_dict):
    day_date = day_dict['date']
    diary = day_dict['breakfast']
    [x.update({'date':day_date, 'meal':'breakfast', 'meal_ind':0}) for x in diary]
    [x.update(x['nutrition_information']) for x in diary]
    [x.pop('nutrition_information', None) for x in diary]
    diary = day_dict['lunch']
    [x.update({'date':day_date, 'meal':'lunch', 'meal_ind':1}) for x in diary]
    [x.update(x['nutrition_information']) for x in diary]
    [x.pop('nutrition_information', None) for x in diary]
    diary = day_dict['dinner']
    [x.update({'date':day_date, 'meal':'dinner', 'meal_ind':2}) for x in diary]
    [x.update(x['nutrition_information']) for x in diary]
    [x.pop('nutrition_information', None) for x in diary]
    diary = day_dict['snacks']
    [x.update({'date':day_date, 'meal':'snacks', 'meal_ind':3}) for x in diary]
    [x.update(x['nutrition_information']) for x in diary]
    [x.pop('nutrition_information', None) for x in diary]

    day_dict.pop('date', None)
    res = [day_dict[key] for key in day_dict.keys()]
    res = [item for sublist in res for item in sublist]
    return res

daily_diary_df = [add_date_meal_to_diary(d) for d in daily_diary_saved]
daily_diary_df = [item for sublist in daily_diary_df for item in sublist]
daily_diary_df = pd.DataFrame(daily_diary_df).fillna(0)

daily_diary_df.sort_values(['date', 'meal_ind'], inplace=True)
daily_diary_df.reset_index(inplace=True, drop=True)
daily_diary_df = daily_diary_df[['date', 'meal', 'name', 'calories', 'carbohydrates', 'fat', 'protein', 'sodium', 'sugar']]
daily_diary_df.to_csv(food_diary_file, index=False)

## Fitbit

NOTES:

The Fitbit API is rate limited at **150 requests / hour / user**.

Some additional data to possibly get in the future, e.g. each lives in its own file, multiple rows per date:
```
'distances': [
    {'activity': 'total', 'distance': 4.27},
    {'activity': 'tracker', 'distance': 4.27},
    {'activity': 'loggedActivities', 'distance': 0},
    {'activity': 'veryActive', 'distance': 1.41},
    {'activity': 'moderatelyActive', 'distance': 0.43},
    {'activity': 'lightlyActive', 'distance': 2.43},
    {'activity': 'sedentaryActive', 'distance': 0}
]

'heartRateZones': [
    {'caloriesOut': 1839.97352,'max': 103, 'min': 30, 'minutes': 1210, 'name': 'Out of Range'},
    {'caloriesOut': 806.60118, 'max': 125, 'min': 103, 'minutes': 158, 'name': 'Fat Burn'},
    {'caloriesOut': 0, 'max': 154, 'min': 125, 'minutes': 0, 'name': 'Cardio'},
    {'caloriesOut': 0, 'max': 220, 'min': 154, 'minutes': 0, 'name': 'Peak'}
]
```

In [6]:
parser = configparser.ConfigParser()
parser.read(cfg_file)
consumer_key = parser.get('fitbit', 'consumer_key')
consumer_secret = parser.get('fitbit', 'consumer_secret')
access_token = parser.get('fitbit', 'access_token')
refresh_token = parser.get('fitbit', 'refresh_token')
expires_at = parser.get('fitbit', 'expires_at')

auth_client = fitbit.Fitbit(consumer_key, consumer_secret,
                            access_token=access_token,
                            refresh_token=refresh_token,
                            expires_at=float(expires_at),
                            refresh_cb=(lambda x: persist_fitbit_refresh_token(x, cfg_file)),
                            api_version=1 # use 1.2 for sleep, otherwise 1
                            )

### Daily steps

In [30]:
start_date = datetime.datetime.strptime('2013-01-14', '%Y-%m-%d').date()
end_date = datetime.datetime.strptime('2015-12-31', '%Y-%m-%d').date()
# num_days = (end_date - start_date).days+1
steps1 = auth_client.time_series('activities/steps', base_date=start_date, end_date=end_date)
steps1_df = pd.DataFrame(steps1['activities-steps']).rename({'dateTime':'date', 'value':'steps'}, axis=1)

start_date = datetime.datetime.strptime('2016-01-01', '%Y-%m-%d').date()
end_date = datetime.datetime.strptime('2018-06-30', '%Y-%m-%d').date()
# num_days = (end_date - start_date).days+1
steps2 = auth_client.time_series('activities/steps', base_date=start_date, end_date=end_date)
steps2_df = pd.DataFrame(steps2['activities-steps']).rename({'dateTime':'date', 'value':'steps'}, axis=1)

start_date = datetime.datetime.strptime('2018-07-01', '%Y-%m-%d').date()
end_date = datetime.datetime.strptime('2021-01-24', '%Y-%m-%d').date()
# num_days = (end_date - start_date).days+1
steps3 = auth_client.time_series('activities/steps', base_date=start_date, end_date=end_date)
steps3_df = pd.DataFrame(steps3['activities-steps']).rename({'dateTime':'date', 'value':'steps'}, axis=1)

In [31]:
steps_df = pd.concat([steps1_df, steps2_df, steps3_df])
steps_df.date = pd.to_datetime(steps_df.date)
steps_df.steps = steps_df.steps.astype('int')
dates_df = pd.DataFrame({'date':pd.date_range('2013-01-01', '2021-01-24', freq='d')})
steps_df = pd.merge(dates_df, steps_df, on='date', how='left')
steps_df.loc[steps_df.steps<100, 'steps'] = np.nan

steps_df.to_csv(steps_daily_file, index=False)

### Daily activities raw data

In [33]:
start_date = '2020-11-04'
end_date = '2021-01-24'
date_start = datetime.datetime.strptime(start_date, '%Y-%m-%d')
date_end = datetime.datetime.strptime(end_date, '%Y-%m-%d')
date_query = date_start
date_diff = date_end - date_query
days = date_diff.days+1
days

82

In [34]:
my_activities = []
dates = []

# API calls
for i in range(days):
    print(date_query.strftime('%Y-%m-%d'))    
    activity = auth_client.activities(date=date_query)
    dates.append(date_query.strftime('%Y-%m-%d'))
    my_activities.append(activity)
    date_query = date_query + datetime.timedelta(days=1)

2020-11-04
2020-11-05
2020-11-06
2020-11-07
2020-11-08
2020-11-09
2020-11-10
2020-11-11
2020-11-12
2020-11-13
2020-11-14
2020-11-15
2020-11-16
2020-11-17
2020-11-18
2020-11-19
2020-11-20
2020-11-21
2020-11-22
2020-11-23
2020-11-24
2020-11-25
2020-11-26
2020-11-27
2020-11-28
2020-11-29
2020-11-30
2020-12-01
2020-12-02
2020-12-03
2020-12-04
2020-12-05
2020-12-06
2020-12-07
2020-12-08
2020-12-09
2020-12-10
2020-12-11
2020-12-12
2020-12-13
2020-12-14
2020-12-15
2020-12-16
2020-12-17
2020-12-18
2020-12-19
2020-12-20
2020-12-21
2020-12-22
2020-12-23
2020-12-24
2020-12-25
2020-12-26
2020-12-27
2020-12-28
2020-12-29
2020-12-30
2020-12-31
2021-01-01
2021-01-02
2021-01-03
2021-01-04
2021-01-05
2021-01-06
2021-01-07
2021-01-08
2021-01-09
2021-01-10
2021-01-11
2021-01-12
2021-01-13
2021-01-14
2021-01-15
2021-01-16
2021-01-17
2021-01-18
2021-01-19
2021-01-20
2021-01-21
2021-01-22
2021-01-23
2021-01-24


In [35]:
# add date field
[my_activities[i].update({'date':dates[i]}) for i in range(len(dates))]

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [36]:
# save raw json to file
with open(steps_activity_dir+'fitbit_activities_20201104_to_20210124.json', 'w') as outfile:
    json.dump(my_activities, outfile, indent=3)

### Individual activity data

In [40]:
# read saved raw data
with open(steps_activity_dir+'fitbit_activities_20150117_to_20201103.json') as data_file:
    fitbit_activities = json.load(data_file)
    
with open(steps_activity_dir+'fitbit_activities_20201104_to_20210124.json') as data_file:
    temp = json.load(data_file)
    
fitbit_activities = fitbit_activities + temp

In [43]:
def get_activity_types(entry):
    act = entry['activities']
    if len(act)==0:
        return []
    return [x['name'] for x in act]

In [44]:
foo = [get_activity_types(x) for x in fitbit_activities]
foo = [j for i in foo for j in i]
foo = pd.DataFrame({'name':foo})

In [45]:
foo.name.value_counts()

Walk               2678
Run                 435
Sport               102
Outdoor Bike          6
Aerobic Workout       1
Elliptical            1
Name: name, dtype: int64

In [46]:
def get_single_activity(activity_data):
    act_type = activity_data['name']
    if act_type in ['Outdoor Bike', 'Aerobic Workout', 'Elliptical']:
        return {}
    date = activity_data['startDate']
    tod = activity_data['startTime']
    timestamp = date + ' ' + tod
    duration_mins = round(activity_data['duration']/1000/60, 2)
    calories = activity_data['calories']
    steps = activity_data['steps']
    return {
        'date': date,
        'timestamp': timestamp,
        'duration_mins':duration_mins,
        'activity_type': act_type,
        'calories':calories,
        'steps':steps
    }
    
def get_activity_for_day(day_data):
    acts = day_data['activities']
    if acts==[]:
        return acts
    res = [get_single_activity(act) for act in acts]
    return res

In [47]:
act_df = [get_activity_for_day(x) for x in fitbit_activities]
act_df = [item for sublist in act_df for item in sublist]
act_df = pd.DataFrame(act_df)
act_df = act_df[~act_df.date.isna()].reset_index(drop=True)

In [48]:
act_df.to_csv(steps_activity_dir+'activities.csv', index=False)

### Daily summary of activity data

In [49]:
def get_activity_steps(acts, date, steps):
    steps_run = 0
    steps_walk = 0
    steps_sport = 0
    calories_run = 0
    calories_walk = 0
    calories_sport = 0
    acts_filtered = []
    for act in acts:
        if act['name'] in ['Run', 'Walk', 'Sport']:
            acts_filtered.append(act)
    if len(acts_filtered)>0:
        act_steps = pd.DataFrame([get_single_activity(act) for act in acts])
        act_steps = act_steps.groupby('activity_type').sum().reset_index()
        steps_run = act_steps[act_steps['activity_type']=='Run'].steps.sum()
        steps_walk = act_steps[act_steps['activity_type']=='Walk'].steps.sum()
        steps_sport = act_steps[act_steps['activity_type']=='Sport'].steps.sum()
        calories_run = act_steps[act_steps['activity_type']=='Run'].calories.sum()
        calories_walk = act_steps[act_steps['activity_type']=='Walk'].calories.sum()
        calories_sport = act_steps[act_steps['activity_type']=='Sport'].calories.sum()
    steps_other = round(steps - steps_run - steps_walk - steps_sport)
    return {
        'date':date,
        'steps':steps,
        'steps_run':steps_run,
        'steps_walk':steps_walk,
        'steps_sport':steps_sport,
        'steps_other':steps_other,
        'calories_run':calories_run,
        'calories_walk':calories_walk,
        'calories_sport':calories_sport
    }
        
def get_daily_act_summary(day_data):
    date = day_data['date']
    acts = day_data['activities']
    summary = day_data['summary']
    res = get_activity_steps(acts, date, summary['steps'])
    return res

In [50]:
act_steps_df = pd.DataFrame([get_daily_act_summary(x) for x in fitbit_activities])
act_steps_df.date = pd.to_datetime(act_steps_df.date)
act_steps_df = act_steps_df[act_steps_df.date>'2015-11-23'].reset_index(drop=True)
act_steps_df.to_csv(steps_activity_dir+'daily_activity_steps_cals.csv', index=False)

In [51]:
def get_daily_summary(day_data):
    date = day_data['date']
    summary = day_data['summary']
    return {
        'date':date,
        'steps':summary['steps'],
        'floors':summary['floors'],
        'heart_rate_resting':summary['restingHeartRate'],
        
        'calories_out':summary['caloriesOut'],
        'calories_active':summary['activityCalories'],
        'calories_bmr':summary['caloriesBMR'],
        
        'minutes_sedentary':summary['sedentaryMinutes'],
        'minutes_lightly_active':summary['lightlyActiveMinutes'],
        'minutes_fairly_active':summary['fairlyActiveMinutes'],
        'minutes_very_active':summary['veryActiveMinutes']        
    }

act_summary_df = pd.DataFrame([get_daily_summary(x) for x in fitbit_activities])
act_summary_df.to_csv(steps_activity_dir+'daily_summary.csv', index=False)

### Detailed steps

In [52]:
start_date = '2021-01-11'
end_date = '2021-01-24'
date_start = datetime.datetime.strptime(start_date, '%Y-%m-%d')
date_end = datetime.datetime.strptime(end_date, '%Y-%m-%d')
date_query = date_start
date_diff = date_end - date_query
days = date_diff.days+1
days

14

In [54]:
steps_detail_dump = []
startdate = datetime.datetime(2021, 1, 11)
date = startdate
for i in range(14): 
    print((i, ' - ', date.strftime('%Y-%m-%d')))
    datarequest = auth_client.intraday_time_series('activities/steps', base_date=date, detail_level='1min')
    time.sleep(0.35)
    steps_detail_dump.append(datarequest)
    date += datetime.timedelta(days=1)

(0, ' - ', '2021-01-11')
(1, ' - ', '2021-01-12')
(2, ' - ', '2021-01-13')
(3, ' - ', '2021-01-14')
(4, ' - ', '2021-01-15')
(5, ' - ', '2021-01-16')
(6, ' - ', '2021-01-17')
(7, ' - ', '2021-01-18')
(8, ' - ', '2021-01-19')
(9, ' - ', '2021-01-20')
(10, ' - ', '2021-01-21')
(11, ' - ', '2021-01-22')
(12, ' - ', '2021-01-23')
(13, ' - ', '2021-01-24')


In [55]:
filename = steps_detail_dir + 'steps_detail_'+startdate.strftime('%Y-%m-%d')+'_to_'+steps_detail_dump[-1]['activities-steps'][0]['dateTime']+'.json'
filename

'/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/fitbit/detailed_steps/steps_detail_2021-01-11_to_2021-01-24.json'

In [56]:
filename = steps_detail_dir + 'steps_detail_'+startdate.strftime('%Y-%m-%d')+'_to_'+steps_detail_dump[-1]['activities-steps'][0]['dateTime']+'.json'

with open(filename, 'w') as outfile:
    json.dump(steps_detail_dump, outfile, indent=3)

### Detailed heart rate

In [57]:
start_date = '2021-01-11'
end_date = '2021-01-24'
date_start = datetime.datetime.strptime(start_date, '%Y-%m-%d')
date_end = datetime.datetime.strptime(end_date, '%Y-%m-%d')
date_query = date_start
date_diff = date_end - date_query
days = date_diff.days+1
days

14

In [59]:
heart_detail_dump = []
startdate = datetime.datetime(2021, 1, 11)
date = startdate
for i in range(14): 
    print((i, ' - ', date.strftime('%Y-%m-%d')))
    datarequest=auth_client.intraday_time_series('activities/heart', base_date=date, detail_level='1sec')
    time.sleep(0.35)
    heart_detail_dump.append(datarequest)
    date += datetime.timedelta(days=1)

(0, ' - ', '2021-01-11')
(1, ' - ', '2021-01-12')
(2, ' - ', '2021-01-13')
(3, ' - ', '2021-01-14')
(4, ' - ', '2021-01-15')
(5, ' - ', '2021-01-16')
(6, ' - ', '2021-01-17')
(7, ' - ', '2021-01-18')
(8, ' - ', '2021-01-19')
(9, ' - ', '2021-01-20')
(10, ' - ', '2021-01-21')
(11, ' - ', '2021-01-22')
(12, ' - ', '2021-01-23')
(13, ' - ', '2021-01-24')


In [60]:
filename = hr_detail_dir + 'heart_detail_'+startdate.strftime('%Y-%m-%d')+'_to_'+heart_detail_dump[-1]['activities-heart'][0]['dateTime']+'.json'
filename

'/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/fitbit/detailed_heart_rate/heart_detail_2021-01-11_to_2021-01-24.json'

In [61]:
filename = hr_detail_dir + 'heart_detail_'+startdate.strftime('%Y-%m-%d')+'_to_'+heart_detail_dump[-1]['activities-heart'][0]['dateTime']+'.json'

with open(filename, 'w') as outfile:
    json.dump(heart_detail_dump, outfile)

### Detailed sleep

In [7]:
start_date = '2021-01-11'
end_date = '2021-01-24'
date_start = datetime.datetime.strptime(start_date, '%Y-%m-%d')
date_end = datetime.datetime.strptime(end_date, '%Y-%m-%d')
date_query = date_start
date_diff = date_end - date_query
days = date_diff.days+1
days

14

In [9]:
sleep_detail_dump = []
startdate = datetime.datetime(2021, 1, 11)
date = startdate
for i in range(14): 
    print((i, ' - ', date.strftime('%Y-%m-%d')))
    datarequest = auth_client.get_sleep(date)
    time.sleep(0.5)
    sleep_detail_dump.append(datarequest)
    date += datetime.timedelta(days=1)

(0, ' - ', '2021-01-11')
(1, ' - ', '2021-01-12')
(2, ' - ', '2021-01-13')
(3, ' - ', '2021-01-14')
(4, ' - ', '2021-01-15')
(5, ' - ', '2021-01-16')
(6, ' - ', '2021-01-17')
(7, ' - ', '2021-01-18')
(8, ' - ', '2021-01-19')
(9, ' - ', '2021-01-20')
(10, ' - ', '2021-01-21')
(11, ' - ', '2021-01-22')
(12, ' - ', '2021-01-23')
(13, ' - ', '2021-01-24')


In [10]:
filename = sleep_detail_dir + 'sleep_detail_'+startdate.strftime('%Y-%m-%d')+'_to_'+sleep_detail_dump[-1]['sleep'][-1]['dateOfSleep']+'.json'
filename

'/Users/jamieinfinity/Dropbox/Projects/WeightForecaster/_data_dump/fitbit/detailed_sleep/sleep_detail_2021-01-11_to_2021-01-24.json'

In [11]:
filename = sleep_detail_dir + 'sleep_detail_'+startdate.strftime('%Y-%m-%d')+'_to_'+sleep_detail_dump[-1]['sleep'][-1]['dateOfSleep']+'.json'

with open(filename, 'w') as outfile:
    json.dump(sleep_detail_dump, outfile, indent=3)