In [1]:
#load in packages
import json
import os
from datetime import datetime, timedelta
from dateutil import tz

In [2]:
def dtParser(dtString, dtStringFormat):
    dt = datetime.strptime(dtString, dtStringFormat)
    return datetime.strftime(dt,'%Y-%m-%d %H:%M:%S')

def convertUTCtoLATimeZone(ts):
    fromTz = tz.gettz('UTC')
    toTz = tz.gettz('America/Los_Angeles')
    
    try:
        utcObj = datetime.strptime(ts, '%m/%d/%y %H:%M:%S')
    except:
        utcObj = datetime.strptime(ts, '%m/%d/%y %H:%M:%S')
    laObj = utcObj.replace(tzinfo = fromTz).astimezone(toTz)
    laTs = datetime.strftime(laObj,'%Y-%m-%d %H:%M:%S')
    
#     print(ts)
#     print(laTs)
#     print("=====")
    
    return laTs

def loadData(dataPrefix, startDate, endDate):
    startDate = datetime.strptime(startDate, '%Y-%m-%d')
    endDate = datetime.strptime(endDate, '%Y-%m-%d')
    
    dataContainer = []
#     for root, dirs, files in os.walk('/Users/jinlong/Documents/git/fitvis/data-repo'):
    for root, dirs, files in os.walk('/Users/jyang02/OneDrive - Amgen/git/fitvis/data-repo'):
        for file in sorted(files):
            try:
                dataDate = datetime.strptime(file.split('.')[0][-10:], '%Y-%m-%d')
                if file.startswith(dataPrefix) and dataDate >= startDate and dataDate <= endDate:
                    #print(dataDate)
                    with open(root + '/' + file, 'r') as f:
                        dataContainer.extend(json.load(f))
            except:
                pass
    return dataContainer

In [3]:
#generate a list of dates as containers
#1st day wearing Fitbit Charge 2 HR: 2018-02-07
dateList = []
startDate = '2020-01-01'
endDate = '2020-02-29'

dateIterator = datetime.strptime(startDate, '%Y-%m-%d')
endDateObj = datetime.strptime(endDate, '%Y-%m-%d')

while dateIterator <= endDateObj:
    dateList.append(dateIterator.strftime('%Y-%m-%d'))
    dateIterator = dateIterator + timedelta(days = 1)

data = {}
for e in dateList:
    data[e] = {}

In [4]:
#load steps data
stepsRawData = loadData('steps', startDate, endDate)

#aggregate steps by 10mins interval
stepAgg = {}

for e in stepsRawData[:]:
    #convert time from UTC to Los Angeles local time
    e['dt-la'] = convertUTCtoLATimeZone(e['dateTime'])
    ts10Mins = e['dt-la'][0:15] + '0:00'#substring YY/MM/DD HH/mm
    if ts10Mins in stepAgg:
        stepAgg[ts10Mins] += int(e['value'])
    else:
        stepAgg[ts10Mins] = int(e['value'])

#sort steps into days
steps = {}

for k, v in stepAgg.items():
    thisDate = k[:10]
    if thisDate in steps:
        steps[thisDate].append({"dt-la": k, 'value': v})
    else:
        steps[thisDate] = [{"dt-la": k, 'value': v}]

# print(steps)

#insert step data into data
for e in steps:
    if e in data:
        data[e]['steps'] = steps[e]

In [5]:
#load sleep data
sleepRawData = loadData('sleep', startDate, endDate)

#parse sleep data
sleepParsed = []
##Need to de-duplicate sleep records by logID
for e in sleepRawData:
    obj = {}
    obj['dt-la-start'] = dtParser(e['startTime'], '%Y-%m-%dT%H:%M:%S.%f')
    obj['dt-la-end'] = dtParser(e['endTime'], '%Y-%m-%dT%H:%M:%S.%f')
    sleepParsed.append(obj)

#split cross-day sleeps
#sort sleep data into days
sleep = {}

for e in sleepParsed:
    if e['dt-la-start'][:10] != e['dt-la-end'][:10]:
        split2 = {'dt-la-start': e['dt-la-end'][:10] + ' 00:00:00', 'dt-la-end': e['dt-la-end']}
        split1 = {'dt-la-start': e['dt-la-start'], 'dt-la-end': e['dt-la-end'][:10] + ' 00:00:00'}
        if split1['dt-la-start'][:10] in sleep:
            sleep[split1['dt-la-start'][:10]].append(split1)
        else:
            sleep[split1['dt-la-start'][:10]] = [split1]
        if split2['dt-la-start'][:10] in sleep:
            sleep[split2['dt-la-start'][:10]].append(split2)
        else:
            sleep[split1['dt-la-start'][:10]] = [split1]
#         print(e)
#         print(split1)
#         print(split2)
#         print(' ')
    else:
        if e['dt-la-start'][:10] in sleep:
            sleep[e['dt-la-start'][:10]].append(e)
        else:
            sleep[e['dt-la-start'][:10]] = [e]

#append sleep data into data
for e in sleep:
    if e in data:
        data[e]['sleep'] = sleep[e]

In [None]:
#load heart rate data
hrRawData = loadData('heart_rate', startDate, endDate)

#sample heart rate by minute
hrSample = {}

for e in hrRawData:
    #covert time from UTC to Los Angeles local time
    e['dt-la'] = convertUTCtoLATimeZone(e['dateTime'])
    ts10Mins = e['dt-la'][:15] #sample by 10 minutes
    hrSample[ts10Mins + '0:00'] = e['value']['bpm'] #sample by 10 minutes
#     ts10Mins = e['dt-la'][:15] #sample by minute
#     hrSample[ts10Mins + ':00'] = e['value']['bpm']#sample by minute
    
#sort hr into days
hr = {}

for k, v in hrSample.items():
    thisDate = k[:10]
    if thisDate in hr:
        hr[thisDate].append({'dt-la': k, 'value': v})
    else:
        hr[thisDate] = [{'dt-la': k, 'value': v}]

#insert hr data into data
for e in hr:
    if e in data:
        data[e]['heart-rate'] = hr[e]

In [None]:
#export data
with open('/Users/jyang02/OneDrive - Amgen/git/fitvis/data/data.json', 'w') as f:
    json.dump(data, f, indent = 4)