In [1]:
#set date range
startDate = '2021-01-15'
endDate = '2021-02-15'

In [2]:
#load in packages
import json
import os
from datetime import datetime, timedelta
from dateutil import tz
import pandas as pd

In [41]:
#define global functions
def dtParser(dtString, dtStringFormat):
    dt = datetime.strptime(dtString, dtStringFormat)
    return datetime.strftime(dt,'%Y-%m-%d %H:%M:%S')

def convertUTCtoLATimeZone(ts):
    fromTz = tz.gettz('UTC')
    toTz = tz.gettz('America/Los_Angeles')
    
    try:
        utcObj = datetime.strptime(ts, '%m/%d/%y %H:%M:%S')
    except:
        utcObj = datetime.strptime(ts, '%Y-%m-%d %H:%M:%S')
    laObj = utcObj.replace(tzinfo = fromTz).astimezone(toTz)
    laTs = datetime.strftime(laObj,'%Y-%m-%d %H:%M:%S')
    
    return laTs

def loadData(dataPrefix, startDate, endDate):
    startDate = datetime.strptime(startDate, '%Y-%m-%d')
    endDate = datetime.strptime(endDate, '%Y-%m-%d')
    
    dataContainer = []
    for root, dirs, files in os.walk('/Users/jyang02/OneDrive - Amgen/git/fitvis/data-repo'):
        for file in sorted(files):
            try:
                dataDate = datetime.strptime(file.split('.')[0][-10:], '%Y-%m-%d')
                if file.startswith(dataPrefix) and dataDate >= startDate + timedelta(days = -15) and dataDate <= endDate:##possibly load more data than needed
                    #print(dataDate)
                    with open(root + '/' + file, 'r') as f:
                        dataContainer.extend(json.load(f))
            except:
                pass
    return dataContainer

In [6]:
#generate a list of dates as containers
#1st day wearing Fitbit Charge 2 HR: 2018-02-07
dateList = []

dateIterator = datetime.strptime(startDate, '%Y-%m-%d')
endDateObj = datetime.strptime(endDate, '%Y-%m-%d')

while dateIterator <= endDateObj:
    dateList.append(dateIterator.strftime('%Y-%m-%d'))
    dateIterator = dateIterator + timedelta(days = 1)

data = {}
for e in dateList:
    data[e] = {}
data

{'2021-01-15': {},
 '2021-01-16': {},
 '2021-01-17': {},
 '2021-01-18': {},
 '2021-01-19': {},
 '2021-01-20': {},
 '2021-01-21': {},
 '2021-01-22': {},
 '2021-01-23': {},
 '2021-01-24': {},
 '2021-01-25': {},
 '2021-01-26': {},
 '2021-01-27': {},
 '2021-01-28': {},
 '2021-01-29': {},
 '2021-01-30': {},
 '2021-01-31': {},
 '2021-02-01': {},
 '2021-02-02': {},
 '2021-02-03': {},
 '2021-02-04': {},
 '2021-02-05': {},
 '2021-02-06': {},
 '2021-02-07': {},
 '2021-02-08': {},
 '2021-02-09': {},
 '2021-02-10': {},
 '2021-02-11': {},
 '2021-02-12': {},
 '2021-02-13': {},
 '2021-02-14': {},
 '2021-02-15': {}}

In [8]:
#load steps data
stepsRawData = loadData('steps', startDate, endDate)

#aggregate steps by 10mins interval
stepAgg = {}

for e in stepsRawData[:]:
    #convert time from UTC to Los Angeles local time
    e['dt-la'] = convertUTCtoLATimeZone(e['dateTime'])
    ts10Mins = e['dt-la'][0:15] + '0:00'#substring YY/MM/DD HH/mm
    if ts10Mins in stepAgg:
        stepAgg[ts10Mins] += int(e['value'])
    else:
        stepAgg[ts10Mins] = int(e['value'])

#sort steps into days
steps = {}

for k, v in stepAgg.items():
    thisDate = k[:10]
    if thisDate in steps:
        steps[thisDate].append({"dt-la": k, 'value': v})
    else:
        steps[thisDate] = [{"dt-la": k, 'value': v}]

# print(steps)

#insert step data into data
for e in steps:
    if e in data:
        data[e]['steps'] = steps[e]

In [63]:
#load sleep data
sleepRawData = loadData('sleep', startDate, endDate)

#parse sleep data
sleepParsed = []
##Need to de-duplicate sleep records by logID
for e in sleepRawData:
    obj = {}
    obj['dt-la-start'] = dtParser(e['startTime'], '%Y-%m-%dT%H:%M:%S.%f')
    obj['dt-la-end'] = dtParser(e['endTime'], '%Y-%m-%dT%H:%M:%S.%f')
    sleepParsed.append(obj)

#split cross-day sleeps
#sort sleep data into days
sleep = {}

for e in sleepParsed:
    if e['dt-la-start'][:10] != e['dt-la-end'][:10]:
        split2 = {'dt-la-start': e['dt-la-end'][:10] + ' 00:00:00', 'dt-la-end': e['dt-la-end']}
        split1 = {'dt-la-start': e['dt-la-start'], 'dt-la-end': e['dt-la-end'][:10] + ' 00:00:00'}
        if split1['dt-la-start'][:10] in sleep:
            sleep[split1['dt-la-start'][:10]].append(split1)
        else:
            sleep[split1['dt-la-start'][:10]] = [split1]
        if split2['dt-la-start'][:10] in sleep:
            sleep[split2['dt-la-start'][:10]].append(split2)
        else:
            sleep[split1['dt-la-start'][:10]] = [split1]
#         print(e)
#         print(split1)
#         print(split2)
#         print(' ')
    else:
        if e['dt-la-start'][:10] in sleep:
            sleep[e['dt-la-start'][:10]].append(e)
        else:
            sleep[e['dt-la-start'][:10]] = [e]

#append sleep data into data
for e in sleep:
    if e in data:
        data[e]['sleep'] = sleep[e]

In [54]:
#load heart rate data
hrRawData = loadData('heart_rate', startDate, endDate)

#create a hr dict
hr = {}

#METHOD 1
# #sample heart rate by minute
# hrSample = {}

# for e in hrRawData:
#     #covert time from UTC to Los Angeles local time
#     e['dt-la'] = convertUTCtoLATimeZone(e['dateTime'])
#     ts10Mins = e['dt-la'][:15] #sample by 10 minutes
#     hrSample[ts10Mins + '0:00'] = e['value']['bpm'] #sample by 10 minutes
# #     ts10Mins = e['dt-la'][:15] #sample by minute
# #     hrSample[ts10Mins + ':00'] = e['value']['bpm']#sample by minute

# #sort hr into days
# for k, v in hrSample.items():
#     thisDate = k[:10]
#     if thisDate in hr:
#         hr[thisDate].append({'dt-la': k, 'value': v})
#     else:
#         hr[thisDate] = [{'dt-la': k, 'value': v}]
        


#METHOD 2

#parse json to df
df = pd.json_normalize(hrRawData)

#convert dateTime col into datetime obj
df['dateTime'] = pd.to_datetime(df['dateTime'])

#drop confidence col
del df['value.confidence']

#summerize bpm by time interval
df = df.groupby(pd.Grouper(key='dateTime', freq='60min')).mean().dropna().round(1)

#rename col
df.reset_index(level=0, inplace=True)
df.columns = ['dt-la', 'value']
df

#convert df to json
hr_json_list = json.loads(df.to_json(orient='records'))


In [53]:
df

Unnamed: 0,dt-la,value
0,2020-12-31 08:00:00,55.2
1,2020-12-31 09:00:00,51.5
2,2020-12-31 10:00:00,52.6
3,2020-12-31 11:00:00,51.2
4,2020-12-31 12:00:00,46.9
...,...,...
1122,2021-02-16 03:00:00,69.7
1123,2021-02-16 04:00:00,67.4
1124,2021-02-16 05:00:00,63.0
1125,2021-02-16 06:00:00,54.3


In [56]:
#change to json to universial format
for e in hr_json_list[:]:
    #convert dt-la to true LA local time from UTC
    e['dt-la'] = convertUTCtoLATimeZone(datetime.strftime(datetime.fromtimestamp(e['dt-la']/1000),'%Y-%m-%d %H:%M:%S'))
    thisDate = e['dt-la'][:10]
    print(thisDate)
    if thisDate in hr:
        hr[thisDate].append(e)
    else:
        hr[thisDate] = [e]



1609401600000
2020-12-30
1609405200000
2020-12-30
1609408800000
2020-12-30
1609412400000
2020-12-30
1609416000000
2020-12-30
1609419600000
2020-12-30
1609423200000
2020-12-30
1609426800000
2020-12-30
1609430400000
2020-12-31
1609434000000
2020-12-31
1609437600000
2020-12-31
1609441200000
2020-12-31
1609444800000
2020-12-31
1609448400000
2020-12-31
1609452000000
2020-12-31
1609455600000
2020-12-31
1609459200000
2020-12-31
1609462800000
2020-12-31
1609466400000
2020-12-31
1609470000000
2020-12-31
1609473600000
2020-12-31
1609477200000
2020-12-31
1609480800000
2020-12-31
1609484400000
2020-12-31
1609488000000
2020-12-31
1609491600000
2020-12-31
1609495200000
2020-12-31
1609498800000
2020-12-31
1609502400000
2020-12-31
1609506000000
2020-12-31
1609509600000
2020-12-31
1609513200000
2020-12-31
1609516800000
2021-01-01
1609520400000
2021-01-01
1609524000000
2021-01-01
1609527600000
2021-01-01
1609531200000
2021-01-01
1609534800000
2021-01-01
1609538400000
2021-01-01
1609542000000
2021-01-01


1612760400000
2021-02-07
1612764000000
2021-02-07
1612767600000
2021-02-07
1612771200000
2021-02-07
1612774800000
2021-02-07
1612778400000
2021-02-07
1612782000000
2021-02-07
1612785600000
2021-02-07
1612789200000
2021-02-07
1612792800000
2021-02-07
1612796400000
2021-02-07
1612800000000
2021-02-08
1612803600000
2021-02-08
1612807200000
2021-02-08
1612810800000
2021-02-08
1612814400000
2021-02-08
1612818000000
2021-02-08
1612821600000
2021-02-08
1612825200000
2021-02-08
1612828800000
2021-02-08
1612832400000
2021-02-08
1612836000000
2021-02-08
1612839600000
2021-02-08
1612843200000
2021-02-08
1612846800000
2021-02-08
1612850400000
2021-02-08
1612854000000
2021-02-08
1612857600000
2021-02-08
1612861200000
2021-02-08
1612864800000
2021-02-08
1612868400000
2021-02-08
1612872000000
2021-02-08
1612875600000
2021-02-08
1612879200000
2021-02-08
1612882800000
2021-02-08
1612886400000
2021-02-09
1612890000000
2021-02-09
1612893600000
2021-02-09
1612897200000
2021-02-09
1612900800000
2021-02-09


In [61]:


#insert hr data into data
for e in hr:
    if e in data:
        data[e]['heart-rate'] = hr[e]

print(data)

{'2021-01-15': {'steps': [{'dt-la': '2021-01-15 00:00:00', 'value': 0}, {'dt-la': '2021-01-15 00:50:00', 'value': 0}, {'dt-la': '2021-01-15 01:00:00', 'value': 0}, {'dt-la': '2021-01-15 01:10:00', 'value': 25}, {'dt-la': '2021-01-15 01:20:00', 'value': 36}, {'dt-la': '2021-01-15 01:30:00', 'value': 0}, {'dt-la': '2021-01-15 01:50:00', 'value': 0}, {'dt-la': '2021-01-15 02:10:00', 'value': 0}, {'dt-la': '2021-01-15 02:40:00', 'value': 0}, {'dt-la': '2021-01-15 03:00:00', 'value': 0}, {'dt-la': '2021-01-15 03:10:00', 'value': 0}, {'dt-la': '2021-01-15 03:20:00', 'value': 0}, {'dt-la': '2021-01-15 04:10:00', 'value': 0}, {'dt-la': '2021-01-15 04:20:00', 'value': 0}, {'dt-la': '2021-01-15 04:30:00', 'value': 0}, {'dt-la': '2021-01-15 04:40:00', 'value': 0}, {'dt-la': '2021-01-15 04:50:00', 'value': 0}, {'dt-la': '2021-01-15 05:20:00', 'value': 0}, {'dt-la': '2021-01-15 05:30:00', 'value': 0}, {'dt-la': '2021-01-15 05:40:00', 'value': 0}, {'dt-la': '2021-01-15 05:50:00', 'value': 0}, {'dt-l

In [37]:
# hr_json_list[:10]
df

Unnamed: 0,dt-la,value
0,2020-12-31 08:00:00,55.2
1,2020-12-31 09:00:00,51.5
2,2020-12-31 10:00:00,52.6
3,2020-12-31 11:00:00,51.2
4,2020-12-31 12:00:00,46.9
...,...,...
1122,2021-02-16 03:00:00,69.7
1123,2021-02-16 04:00:00,67.4
1124,2021-02-16 05:00:00,63.0
1125,2021-02-16 06:00:00,54.3


In [64]:
#export data
with open('/Users/jyang02/OneDrive - Amgen/git/fitvis/data/data-test.json', 'w') as f:
    json.dump(data, f, indent = 4)