# Fitbit Exploration

In [1]:
import os
import sys
sys.path.append('../')

from src.features import build_features
from src.visualization import visualize
from src.reports import make_report

import pandas as pd
import numpy as np

from datetime import datetime, timedelta
import json
import ast

# Data Import

## Functions
Below are a set of useful functions for handling the Fitbit Data

In [2]:
def get_device_df(info_df):
    '''
    Take dictionary-like entries for fitbit info dataframe for each row in a dataframe and makes a new dataframe
    
    Inputs:
    - info_df: the fitbit info dataframe with the dictionary-like entries
    
    Returns a dataframe for the device column
    '''
    
    overall_dict = {}
    for row in range(len(info_df)):
        Dict = ast.literal_eval(info_df['devices'][row])
        if type(Dict) == dict:
            Dict = Dict
        elif type(Dict) in [tuple,list] and len(Dict) > 1:
            Dict = Dict[0]
        else:
            continue

        for key in Dict.keys():
            overall_dict.setdefault(key, [])
            overall_dict[key].append(Dict[key])
        # adding in the date of recording
        overall_dict.setdefault('date', [])
        overall_dict['date'].append(info_df.index[row])
        
    df = pd.DataFrame(overall_dict)
    df['date'] = pd.to_datetime(df['date'],errors='coerce')
    return df.set_index('date')

In [5]:
def process_fitbit_intraday(raw_df,resample_rate=60):
    '''
    
    '''
    try:
        df = raw_df.resample(f'{resample_rate}T').mean()
    except TypeError:
        print(f"\t\tDataframe is most likely empty ({len(raw_df)})")
        return raw_df
    return df

## Single Participant
Starting with one participant to see how to work with the data

In [21]:
pt = "2xtqkfz1"
#pt_dir = f"/Volumes/HEF_Dissertation_Research/utx000/extension/data/fitbit/{pt}"
pt_dir = f"../data/raw/utx000/fitbit/{pt}"
single_info = pd.read_csv(f"{pt_dir}/fitbit/fitbit_info.csv")
single_daily = pd.read_csv(f"{pt_dir}/fitbit/fitbit_daily_records.csv")
single_intra = pd.read_csv(f"{pt_dir}/fitbit/fitbit_intraday_records.csv",index_col=0,parse_dates=True)

### Info CSV
Below is an output of the ```fitbit_info.csv```

In [7]:
single_info.head()

Unnamed: 0,date,devices,friends,friends_leaderboard
0,2020-06-19 01:00:31.116944,"[{'battery': 'High', 'batteryLevel': 89, 'devi...",[],"[{'id': '86SZWS', 'type': 'ranked-user', 'step..."
1,2020-06-20 01:00:30.972641,"[{'battery': 'High', 'batteryLevel': 89, 'devi...",[],"[{'id': '86SZWS', 'type': 'ranked-user', 'step..."
2,2020-06-21 01:00:30.793194,"[{'battery': 'High', 'batteryLevel': 89, 'devi...",[],"[{'id': '86SZWS', 'type': 'ranked-user', 'step..."
3,2020-06-22 01:00:30.964714,"[{'battery': 'High', 'batteryLevel': 89, 'devi...",[],"[{'id': '86SZWS', 'type': 'ranked-user', 'step..."
4,2020-06-23 01:00:31.202136,"[{'battery': 'High', 'batteryLevel': 89, 'devi...",[],"[{'id': '86SZWS', 'type': 'ranked-user', 'step..."


<div class="alert alert-block alert-danger"> 
    The get_device_df function does not work, but it doesn't give us any good information anyway so I will leave it unchanged

In [8]:
#devices = get_device_df(single_info)
#devices.head()

The ```fitbit_info.csv``` seems to have good information for debugging purposes and tracking battery levels, but overall won't be useful for any analysis purposes.

### Daily CSV

In [22]:
single_daily.head()

Unnamed: 0,date,activities_calories,activities_caloriesBMR,activities_steps,activities_distance,activities_minutesSedentary,activities_minutesLightlyActive,activities_minutesFairlyActive,activities_minutesVeryActive,activities_activityCalories,body_bmi,body_fat,body_weight,foods_log_caloriesIn,foods_log_water,activities_heart,sleep
0,2020-05-01 00:00:00,1320.0,1320.0,0,0.0,1440,0,0,0,0.0,19.993023,0.0,119.99,0.0,0.0,"{'customHeartRateZones': [], 'heartRateZones':...",
1,2020-05-02 00:00:00,1320.0,1320.0,0,0.0,1440,0,0,0,0.0,19.993023,0.0,119.99,0.0,0.0,"{'customHeartRateZones': [], 'heartRateZones':...",
2,2020-05-03 00:00:00,1320.0,1320.0,0,0.0,1440,0,0,0,0.0,19.993023,0.0,119.99,0.0,0.0,"{'customHeartRateZones': [], 'heartRateZones':...",
3,2020-05-04 00:00:00,1320.0,1320.0,0,0.0,1440,0,0,0,0.0,19.993023,0.0,119.99,0.0,0.0,"{'customHeartRateZones': [], 'heartRateZones':...",
4,2020-05-05 00:00:00,1320.0,1320.0,0,0.0,1440,0,0,0,0.0,19.993023,0.0,119.99,0.0,0.0,"{'customHeartRateZones': [], 'heartRateZones':...",


In [24]:
single_daily["sleep"].unique()

array([nan,
       "[{'dateOfSleep': '2020-07-10', 'duration': 19740000, 'efficiency': 91, 'endTime': '2020-07-10T09:11:00.000', 'infoCode': 0, 'isMainSleep': True, 'levels': {'data': [{'dateTime': '2020-07-10T03:42:00.000', 'level': 'wake', 'seconds': 720}, {'dateTime': '2020-07-10T03:54:00.000', 'level': 'light', 'seconds': 960}, {'dateTime': '2020-07-10T04:10:00.000', 'level': 'deep', 'seconds': 2550}, {'dateTime': '2020-07-10T04:52:30.000', 'level': 'light', 'seconds': 2880}, {'dateTime': '2020-07-10T05:40:30.000', 'level': 'wake', 'seconds': 240}, {'dateTime': '2020-07-10T05:44:30.000', 'level': 'light', 'seconds': 1860}, {'dateTime': '2020-07-10T06:15:30.000', 'level': 'deep', 'seconds': 600}, {'dateTime': '2020-07-10T06:25:30.000', 'level': 'light', 'seconds': 1170}, {'dateTime': '2020-07-10T06:45:00.000', 'level': 'wake', 'seconds': 1050}, {'dateTime': '2020-07-10T07:02:30.000', 'level': 'light', 'seconds': 30}, {'dateTime': '2020-07-10T07:03:00.000', 'level': 'rem', 'seconds':

### Intraday CSV

In [10]:
single_intra.head()

Unnamed: 0_level_0,activities_calories,activities_steps,activities_distance,activities_heart
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-05-22 09:02:00,0.85272,,,
2020-05-22 09:02:05,,,,70.0
2020-05-22 09:02:20,,,,70.0
2020-05-22 09:02:30,,,,72.0
2020-05-22 09:02:35,,,,76.0


## All Participants
Now that we have looked at the individual data files, now we can start to import all the data for all participants.

In [9]:
def import_fitbit(filename, pt_dir=f"/Volumes/HEF_Dissertation_Research/utx000/extension/data/fitbit/",verbose=False):
    '''
    
    '''
    print(f"\tReading from file {filename}")
    df = pd.DataFrame()
    for pt in os.listdir(pt_dir):
        if pt[0] != ".":
            if verbose:
                print(f"\t\tReading for participant {pt}")
            try:
                temp = pd.read_csv(f"{pt_dir}{pt}/fitbit/fitbit_{filename}.csv", index_col=0, parse_dates=True)
                if filename[:4] == "intr":
                    temp = process_fitbit_intraday(temp)
                
                temp["beiwe"] = pt
                df = df.append(temp)
            except FileNotFoundError:
                print(f"\t\tFile {filename} not found for participant {pt}")
                
    return df

In [11]:
#fitbit_info = import_fitbit("info")
#fitbit_intra = import_fitbit("intraday_records")

	Reading from file daily_records
		Reading for participant hfttkth7
		Reading for participant shi1ykro
		Reading for participant 5bg4j1pp
		Reading for participant r11k6uxz
		Reading for participant 8vpj3b9v
		Reading for participant hcpu5myv
		Reading for participant itn82o6p
		Reading for participant qpro16vm
		Reading for participant 34483b37
		Reading for participant nvtfpaor
		Reading for participant lkkjddam
		Reading for participant 6rxyg4rp
		Reading for participant 789gcb6u
		File daily_records not found for participant 789gcb6u
		Reading for participant 1adkek2h
		Reading for participant itmylz3g
		Reading for participant zdpffrox
		Reading for participant 2xtqkfz1
		Reading for participant fxw5xupi
		Reading for participant derjasj9
		Reading for participant 2vs5x6x9
		Reading for participant bjhpm88s
		Reading for participant vpy1a985
		Reading for participant fjor6fof
		Reading for participant 1a9udoc5
		Reading for participant drs2jy5f
		Reading for participant ijz9gssj
	

### Daily Fitbit
Below is an output of the ```daily_fitbit.csv``` file and a closer look at some of the more interesting entries.

In [17]:
pt_dir = f"../data/raw/utx000/fitbit/"
fitbit_daily = import_fitbit("daily_records",pt_dir)
fitbit_daily.head()

Unnamed: 0_level_0,activities_calories,activities_caloriesBMR,activities_steps,activities_distance,activities_minutesSedentary,activities_minutesLightlyActive,activities_minutesFairlyActive,activities_minutesVeryActive,activities_activityCalories,body_bmi,body_fat,body_weight,foods_log_caloriesIn,foods_log_water,activities_heart,sleep,beiwe
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2020-05-01,1949.0,1255.0,8704,3.47244,520,258,22,0,886.0,22.082436,0.0,119.99,0.0,0.0,"{'customHeartRateZones': [], 'heartRateZones':...","[{'dateOfSleep': '2020-05-01', 'duration': 384...",15tejjtw
2020-05-02,1821.0,1255.0,7532,3.032745,540,249,0,0,740.0,22.082436,0.0,119.99,0.0,0.0,"{'customHeartRateZones': [], 'heartRateZones':...","[{'dateOfSleep': '2020-05-02', 'duration': 390...",15tejjtw
2020-05-03,2455.0,1255.0,20823,8.556225,686,290,22,81,1503.0,22.082436,0.0,119.99,0.0,0.0,"{'customHeartRateZones': [], 'heartRateZones':...","[{'dateOfSleep': '2020-05-03', 'duration': 216...",15tejjtw
2020-05-04,1327.0,1255.0,522,0.210179,792,40,0,0,92.0,22.082436,0.0,119.99,0.0,0.0,"{'customHeartRateZones': [], 'heartRateZones':...","[{'dateOfSleep': '2020-05-04', 'duration': 364...",15tejjtw
2020-05-05,1964.0,1255.0,11851,5.009892,411,124,27,65,862.0,22.082436,0.0,119.99,0.0,0.0,"{'customHeartRateZones': [], 'heartRateZones':...","[{'dateOfSleep': '2020-05-05', 'duration': 576...",15tejjtw


The ```fitbit_daily.csv``` has a lot of good summary data including sleep summary values and sleep data by the minute.

In [41]:
def get_sleep_df(daily_df,verbose=False):
    '''
    Creates a dataframe with the daily sleep data summarized
    
    Inputs:
    - daily_df: dataframe created from the daily fitbit csv file
    
    Returns a dataframe of the daily sleep data
    '''
    overall_dict = {}
    for row in range(len(daily_df)):
        # in case Fitbit didn't record sleep records for that night - value is NaN
        pt = daily_df['beiwe'][row]
        if verbose:
            print(f"\t\tWorking for Participant {pt}")
        # pts with classic sleep data
        if "awakeCount" in daily_df.columns:
            continue
        if type(daily_df['sleep'][row]) == float:
            continue
        else:
            Dict = ast.literal_eval(daily_df['sleep'][row])
            if type(Dict) == dict:
                Dict = Dict
            else:
                Dict = Dict[0]
            if "awakeCount" not in Dict.keys():
                for key in Dict.keys():
                    overall_dict.setdefault(key, [])
                    overall_dict[key].append(Dict[key])
                # adding in the date of recording
                overall_dict.setdefault('date', [])
                overall_dict['date'].append(daily_df.index[row])
                # adding beiwe id
                overall_dict.setdefault('beiwe', [])
                overall_dict['beiwe'].append(daily_df['beiwe'][row])

    for key, val in overall_dict.items():
        print(f"{key}: {len(overall_dict[key])}")
    df = pd.DataFrame(overall_dict)
    df['date'] = pd.to_datetime(df['date'],errors='coerce')
    # removing classic sleep stage data
    df = df[df['type'] != 'classic']
    return df.set_index('date')

In [42]:
daily_slp = get_sleep_df(fitbit_daily,verbose=False)
daily_slp.head()

dateOfSleep: 3979
duration: 3979
efficiency: 3979
endTime: 3979
infoCode: 3979
isMainSleep: 3979
levels: 3979
logId: 3979
minutesAfterWakeup: 3979
minutesAsleep: 3979
minutesAwake: 3979
minutesToFallAsleep: 3979
startTime: 3979
timeInBed: 3979
type: 3979
date: 3979
beiwe: 3979


Unnamed: 0_level_0,dateOfSleep,duration,efficiency,endTime,infoCode,isMainSleep,levels,logId,minutesAfterWakeup,minutesAsleep,minutesAwake,minutesToFallAsleep,startTime,timeInBed,type,beiwe
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2020-05-14,2020-05-14,24360000,97,2020-05-14T07:13:00.000,0,True,{'data': [{'dateTime': '2020-05-14T00:27:00.00...,27170628887,0,379,27,0,2020-05-14T00:27:00.000,406,stages,hfttkth7
2020-05-15,2020-05-15,29580000,87,2020-05-15T08:06:30.000,0,True,{'data': [{'dateTime': '2020-05-14T23:53:30.00...,27187368950,8,392,101,0,2020-05-14T23:53:30.000,493,stages,hfttkth7
2020-05-16,2020-05-16,19740000,95,2020-05-16T04:57:00.000,0,True,{'data': [{'dateTime': '2020-05-15T23:28:00.00...,27197675883,7,287,42,0,2020-05-15T23:28:00.000,329,stages,hfttkth7
2020-05-17,2020-05-17,26820000,96,2020-05-17T09:28:30.000,0,True,{'data': [{'dateTime': '2020-05-17T02:01:30.00...,27214680283,8,403,44,0,2020-05-17T02:01:30.000,447,stages,hfttkth7
2020-05-18,2020-05-18,24960000,92,2020-05-18T07:20:00.000,0,True,{'data': [{'dateTime': '2020-05-18T00:24:00.00...,27227113904,0,351,65,0,2020-05-18T00:24:00.000,416,stages,hfttkth7


In [48]:
def get_minute_sleep_df(daily_sleep):
    '''
    Creates a dataframe for the minute sleep data
    
    Input(s):
    - daily_sleep: dataframe holding the daily sleep data with a column called minuteData
    
    Returns:
    - sleep_stages: a dataframe with sleep stage data for every stage transition
    - summary: a dataframe with the nightly sleep stage information
    '''
    
    data_dict = {'startDate':[],'endDate':[],'dateTime':[],'level':[],'seconds':[],'beiwe':[]}
    summary_dict = {'startDate':[],'endDate':[],'deep_count':[],'deep_minutes':[],'light_count':[],'light_minutes':[],
                    'rem_count':[],'rem_minutes':[],'wake_count':[],'wake_minutes':[],'beiwe':[]}
    for row in range(len(daily_sleep)):
        d0 = pd.to_datetime(daily_sleep['startTime'][row])
        d1 = pd.to_datetime(daily_sleep.index[row])
        sleep_dict = daily_sleep['levels'][row]
        for key in sleep_dict.keys():
            if key == 'data': # data without short wake periods
                temp_data = sleep_dict['data']
                for temp_data_dict in temp_data:
                    for data_key in temp_data_dict.keys():
                        data_dict[data_key].append(temp_data_dict[data_key])
                    data_dict['startDate'].append(d0.date())
                    data_dict['endDate'].append(d1.date())
                    data_dict['beiwe'].append(daily_sleep['beiwe'][row])
            elif key == 'summary': # nightly summary data - already in dictionary form
                for summary_key in sleep_dict['summary'].keys():
                    stage_dict = sleep_dict['summary'][summary_key]
                    for stage_key in ['count','minutes']:
                        summary_dict[f'{summary_key}_{stage_key}'].append(stage_dict[stage_key])
                    
                summary_dict['startDate'].append(d0.date())
                summary_dict['endDate'].append(d1.date())
                summary_dict['beiwe'].append(daily_sleep['beiwe'][row])
            else: # shortData or data with short wake periods - don't need
                pass
            
    sleep_stages = pd.DataFrame(data_dict)
    sleep_stages.columns = ['start_date','end_date','time','stage','time_at_stage','beiwe'] # renaming columns
    # adding column for numeric value of sleep stage 
    def numeric_from_str_sleep_stage(row):
        if row['stage'] == 'wake':
            return 0
        elif row['stage'] == 'light':
            return 1
        elif row['stage'] == 'deep':
            return 2
        elif row['stage'] == 'rem':
            return 3
        else:
            return -1
        
    sleep_stages['value'] = sleep_stages.apply(lambda row: numeric_from_str_sleep_stage(row), axis=1)
    
    summary = pd.DataFrame(summary_dict)
    return sleep_stages, summary

In [49]:
stages, summary = get_minute_sleep_df(daily_slp)
stages.head()

Unnamed: 0,start_date,end_date,time,stage,time_at_stage,beiwe,value
0,2020-05-14,2020-05-14,2020-05-14T00:27:00.000,wake,510,hfttkth7,0
1,2020-05-14,2020-05-14,2020-05-14T00:35:30.000,light,420,hfttkth7,1
2,2020-05-14,2020-05-14,2020-05-14T00:42:30.000,deep,1590,hfttkth7,2
3,2020-05-14,2020-05-14,2020-05-14T01:09:00.000,light,1290,hfttkth7,1
4,2020-05-14,2020-05-14,2020-05-14T01:30:30.000,rem,840,hfttkth7,3


In [15]:
summary.head()

Unnamed: 0,startDate,endDate,deep_count,deep_minutes,light_count,light_minutes,rem_count,rem_minutes,wake_count,wake_minutes,beiwe
0,2020-05-01,2020-05-01,5,119,43,313,18,135,53,73,15tejjtw
1,2020-05-02,2020-05-02,4,115,46,336,12,106,47,94,15tejjtw
2,2020-05-03,2020-05-03,4,90,22,157,5,54,20,60,15tejjtw
3,2020-05-04,2020-05-04,5,104,40,323,8,88,37,93,15tejjtw
4,2020-05-06,2020-05-06,4,101,30,211,19,121,34,74,15tejjtw
