In [None]:
import pandas as pd
import numpy as np
import json
import os
from datetime import datetime, timedelta, time
from garminconnect import Garmin

In [None]:
LOOKBACK_WINDOW = 90
START_DATE = datetime.today().date()

In [None]:
email = os.getenv('garmin_email')
password = os.environ.get('garmin_pwd')

In [None]:
api = Garmin(email, password)
api.login()

In [None]:
def datetime_range(start, end, delta):
    current = start
    while current < end:
        yield current
        current += delta

dts = [pd.to_datetime(dt.strftime('%Y-%m-%dT%H:%M:%S')) for dt in 
       datetime_range(datetime.combine(START_DATE-timedelta(LOOKBACK_WINDOW), time.min), 
                      datetime.combine(START_DATE+timedelta(1), time.min), 
                      timedelta(minutes=1))]

val= {'timestamp': dts}
main_df = pd.DataFrame(val)

In [None]:
# steps data
steps_vals = {
    'startGMT': [],
    'endGMT': [],
    'steps': [],
    'primaryActivityLevel': [],
    'activityLevelConstant': []
}
for i in range(LOOKBACK_WINDOW):
    steps_l = api.get_steps_data((START_DATE - timedelta(days=i)).isoformat())
    for entry in steps_l:
        for key in entry:
            steps_vals[key].append(entry[key])
# steps df has info on the number of steps, activity level in 15 min interval
steps_df = pd.DataFrame(steps_vals)
steps_df['startGMT']=pd.to_datetime(steps_df['startGMT'])
steps_df['endGMT']=pd.to_datetime(steps_df['endGMT'])

# no missing values
steps_df.isnull().sum()

In [None]:
# joining steps
main_df = pd.merge(main_df,
        steps_df,
        left_on='timestamp',
        right_on='startGMT',
        how='left')

In [None]:
# heart rate data
hr_vals = {
    'recordGMT': [],
    'heartRate': []
}

for i in range(LOOKBACK_WINDOW):
    heart_l = api.get_heart_rates((START_DATE - timedelta(days=i)).isoformat())['heartRateValues']
    for entry in heart_l:
        timestamp = datetime.fromtimestamp(int(str(entry[0])[:10]))
        hr_vals['recordGMT'].append(timestamp)
        hr_vals['heartRate'].append(entry[1])

# heart rate data available at a 2 minute interval
hr_df = pd.DataFrame(hr_vals)

# there are missing values in hr dataset (233 at the moment of taking a sample
# NA values identify that the watch has been taken off (?)
# as after them there is a variable period of time (more than 2 minutes) when there are no records
# IMO those values should be dropped entirely
hr_df.isnull().sum()
hr_df = hr_df.fillna('NA')

In [None]:
# joining heart rate
main_df = pd.merge(main_df,
                   hr_df,
                   left_on='timestamp',
                   right_on='recordGMT',
                   how='left')

In [None]:
# daily resting heart data
rhr_vals = {
    'calendarDate': [],
    'restingHeartRate': []
}

for i in range(LOOKBACK_WINDOW):
    rhr_dict = api.get_rhr_day((START_DATE - timedelta(days=i)).isoformat())['allMetrics']['metricsMap']['WELLNESS_RESTING_HEART_RATE'][0]
    rhr_vals['calendarDate'].append(rhr_dict['calendarDate'])
    rhr_vals['restingHeartRate'].append(rhr_dict['value'])

# resting heart rate available daily
rhr_df = pd.DataFrame(rhr_vals)
rhr_df['calendarDate'] = pd.to_datetime(rhr_df['calendarDate']) 

# no missing values
rhr_df.isnull().sum()

# joining daily heart rate
main_df = pd.merge(main_df,
                   rhr_df,
                   left_on='timestamp',
                   right_on='calendarDate',
                   how='left')

In [None]:
# sleep data

# daily sleep data
daily_sleep_vals = {
    'calendarDate': [],
    'sleepTimeSeconds': [],
    'napTimeSeconds': [],
    'sleepStartTimestampGMT': [],
    'sleepEndTimestampGMT': [],
    'deepSleepSeconds': [],
    'lightSleepSeconds': []
}

# sleep movement data - every minute
sleep_movement_vals = {
    'startGMT': [],
    'endGMT': [],
    'activityLevel': []
}

# sleep levels data - random timeframe
sleep_level_vals = {
    'startGMT': [],
    'endGMT': [],
    'activityLevel': []
}

# sleep respiration - every two minutes
sleep_resp_vals = {
    'startTimeGMT': [],
    'respirationValue': [],
}

# sleep stress - every three minutes
sleep_stress_vals = {
    'startGMT': [],
    'value': []
}

for i in range(LOOKBACK_WINDOW):
    sleep_api = api.get_sleep_data((START_DATE - timedelta(days=i)).isoformat())
    # daily sleep
    daily_sleep_dict = sleep_api['dailySleepDTO']
    for key, value in daily_sleep_dict.items():
        if key in daily_sleep_vals.keys():
            if 'GMT' in key:
                try:
                    value = datetime.fromtimestamp(int(str(value)[:10]))
                except:
                    value = None
            daily_sleep_vals[key].append(value)
    
    # sleep movement
    sleep_movement_l = sleep_api['sleepMovement']
    for entry in sleep_movement_l:
        for k, v in entry.items():
            sleep_movement_vals[k].append(v)
    
    # sleep activity levels
    sleep_act_l = sleep_api['sleepLevels']
    for entry in sleep_act_l:
        for k,v in entry.items():
            sleep_level_vals[k].append(v)
            
    # sleep respiration
    try:
        sleep_resp_l = sleep_api['wellnessEpochRespirationDataDTOList']
        for entry in sleep_resp_l:
            sleep_resp_vals['startTimeGMT'].append(datetime.fromtimestamp(int(str(entry['startTimeGMT'])[:10])))
            sleep_resp_vals['respirationValue'].append(entry['respirationValue'])
    except:
        continue
    
    # sleep stress
    sleep_stress_l = sleep_api['sleepStress']
    for entry in sleep_stress_l:
        sleep_stress_vals['startGMT'].append(datetime.fromtimestamp(int(str(entry['startGMT'])[:10])))
        sleep_stress_vals['value'].append(entry['value'])

In [None]:
daily_sleep_df = pd.DataFrame(daily_sleep_vals)
daily_sleep_df['calendarDate'] = pd.to_datetime(daily_sleep_df['calendarDate'])

# there are missing values in daily sleep
# 23 at the time of of the sample pull
# filled with NA
daily_sleep_df.isnull().sum()
daily_slee_df = daily_sleep_df.fillna('NA')

# joining daily sleep
main_df = pd.merge(main_df,
                   daily_sleep_df,
                   left_on='timestamp',
                   right_on='calendarDate',
                   how='left')

In [None]:
sleep_movement_df = pd.DataFrame(sleep_movement_vals)
sleep_movement_df['startGMT']=pd.to_datetime(sleep_movement_df['startGMT'])
sleep_movement_df['endGMT']=pd.to_datetime(sleep_movement_df['endGMT'])
sleep_movement_df = sleep_movement_df.rename(columns={'activityLevel': 'sleepMovementActivityLevel'})
# no missing values
sleep_movement_df.isnull().sum()
# joining sleep movement
main_df = pd.merge(main_df,
                   sleep_movement_df,
                   left_on='timestamp',
                   right_on='startGMT',
                   how='left')

In [None]:
sleep_activity_lvl_df = pd.DataFrame(sleep_level_vals)
sleep_activity_lvl_df['startGMT']=pd.to_datetime(sleep_activity_lvl_df['startGMT'])
sleep_activity_lvl_df['endGMT']=pd.to_datetime(sleep_activity_lvl_df['endGMT'])
sleep_activity_lvl_df = sleep_activity_lvl_df.rename(columns={'activityLevel': 'sleepActivityLevel'})
# no missing values
sleep_activity_lvl_df.isnull().sum()
# joining sleep activity lvl
main_df = pd.merge(main_df,
                   sleep_activity_lvl_df,
                   left_on='timestamp',
                   right_on='startGMT',
                   how='left')

In [None]:
sleep_resp_df = pd.DataFrame(sleep_resp_vals)
# no missing values
sleep_resp_df.isnull().sum()
# joining sleep activity lvl
main_df = pd.merge(main_df,
                   sleep_resp_df,
                   left_on='timestamp',
                   right_on='startTimeGMT',
                   how='left')

In [None]:
sleep_stress_df = pd.DataFrame(sleep_stress_vals)
sleep_stress_df = sleep_stress_df.rename(columns={'value': 'sleepStress'})
# no missing values
sleep_stress_df.isnull().sum()
main_df = pd.merge(main_df,
                   sleep_stress_df,
                   left_on='timestamp',
                   right_on='startGMT',
                   how='left')

In [None]:
# stress data - every three minutes
# body battery - every three minutes
stress_vals = {
    'timestampGMT': [],
    'stressLevel': []
}

battery_vals = {
    'timestampGMT': [],
    'bodyBatteryStatus': [],
    'bodyBatteryLevel': [],
    'bodyBatteryVersion': []
}

for i in range(LOOKBACK_WINDOW):
    stress_api = api.get_stress_data((START_DATE - timedelta(days=i)).isoformat())
    #stress parsing
    stress_l = stress_api['stressValuesArray']
    for entry in stress_l:
        timestamp = datetime.fromtimestamp(int(str(entry[0])[:10]))
        stress_vals['timestampGMT'].append(timestamp)
        stress_vals['stressLevel'].append(entry[1])
        
    #body battery parsing
    body_battery_l = stress_api['bodyBatteryValuesArray']
    for entry in body_battery_l:
        timestamp = datetime.fromtimestamp(int(str(entry[0])[:10]))
        battery_vals['timestampGMT'].append(timestamp)
        battery_vals['bodyBatteryStatus'].append(entry[1])
        battery_vals['bodyBatteryLevel'].append(entry[2])
        battery_vals['bodyBatteryVersion'].append(entry[3])

In [None]:
stress_df = pd.DataFrame(stress_vals)
# no missing values
stress_df.isnull().sum()
# joining stress
main_df = pd.merge(main_df,
                   stress_df,
                   left_on='timestamp',
                   right_on='timestampGMT',
                   how='left')

In [None]:
body_battery_df = pd.DataFrame(battery_vals)
# there are missing values
# consider dropping (?)
body_battery_df.isnull().sum()
# joining body battery
main_df = pd.merge(main_df,
                   body_battery_df,
                   left_on='timestamp',
                   right_on='timestampGMT',
                   how='left')

In [None]:
# drop time columns
main_df = main_df[main_df.columns.drop(list(main_df.filter(regex='GMT|Date')))]

In [None]:
#### cleaning part

In [None]:
# as it was joined on startGMT, we forward fill 15 (incremental time)
main_df['steps'] = main_df['steps'].ffill(limit=15)
main_df['steps'] = main_df['steps']/15
main_df['steps'] = main_df['steps'].fillna('NA')

for column in steps_df.columns[3:]:
    try:
        main_df[column] = main_df[column].ffill(limit=15)
        main_df[column] = main_df[column].fillna('NA')
    except KeyError:
        continue

In [None]:
# forward filling the heart rate
# NA values indicate that the watch was taken off and then it resumes when put back
# by ffilling, we eliminate that
main_df['heartRate'] = main_df['heartRate'].ffill()

In [None]:
# back filling resting heart rate for the entirety of the day
main_df['restingHeartRate'] = main_df['restingHeartRate'].bfill()

In [None]:
# back filling sleep df
for column in daily_sleep_df.columns:
    try:
        main_df[column] = main_df[column].bfill()
    except KeyError:
        continue

In [None]:
# fill sleep movement with NA as bfill is not applicable
main_df['sleepMovementActivityLevel'] = main_df['sleepMovementActivityLevel'].fillna('NA')

In [None]:
# decide on this df later
sleep_activity_lvl_df['sleepActivityLevel'].unique()

In [None]:
# backfilling respiration value
# bfill values with a limit of two to prevent filling the NAs
# fill up the rest with NA's as it is not applicable
main_df['respirationValue'] = main_df['respirationValue'].bfill(limit=2)
main_df['respirationValue'] = main_df['respirationValue'].fillna('NA')

In [None]:
# backfilling sleep stress
# bfill values with a limit of three to prevent filling the NAs
# fill up the rest with NA's as it is not applicable
main_df['sleepStress'] = main_df['sleepStress'].bfill(limit=3)
main_df['sleepStress'] = main_df['sleepStress'].fillna('NA')

In [None]:
# backfilling stress_df
# bfill with a limit of three
main_df['stressLevel'] = main_df['stressLevel'].bfill(limit=3)
main_df['stressLevel'] = np.where(main_df['stressLevel'] >= 0, 
                                  main_df['stressLevel'], 
                                  'NA')
main_df['stressLevel'] = main_df['stressLevel'].fillna('NA')

In [None]:
# backfilling stress_df
# bfill with a limit of three
for column in body_battery_df.columns:
    try:
        main_df[column] = main_df[column].bfill(limit=3)
        main_df[column] = main_df[column].fillna('NA')
    except KeyError:
        continue

In [None]:
main_df[main_df['heartRate'].notnull()]