In [1]:
import pandas as pd
pd.set_option('display.float_format', lambda x: '%.2f' %x)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import re

# import plotly_credentials
# plotly_username = plotly_credentials.credentials['username']
# plotly_api_key = plotly_credentials.credentials['api_key']

import plotly
plotly.offline.init_notebook_mode(connected = True)

import plotly.graph_objs as go

In [2]:
# Import sleep and heart rate data
sleep = pd.read_csv('../Data/sleep_cleaned.csv', index_col = 'logId', parse_dates = ['startTime', 'endTime', 'dateOfSleep'])

heart_rate = pd.read_csv('../Data/heartrate.csv', parse_dates = ['dateTime'])

In [3]:
sleep.head()

Unnamed: 0_level_0,dateOfSleep,efficiency,endTime,infoCode,levels.summary.asleep.count,levels.summary.asleep.minutes,levels.summary.awake.count,levels.summary.awake.minutes,levels.summary.deep.count,levels.summary.deep.minutes,...,levels.summary.wake.count,levels.summary.wake.minutes,levels.summary.wake.thirtyDayAvgMinutes,minutesAfterWakeup,minutesAsleep,minutesAwake,minutesToFallAsleep,startTime,timeInBed,type
logId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
20597811657,2018-12-26,96,2018-12-26 10:55:00,0,0.0,0.0,0.0,0.0,6.0,65.0,...,32.0,33.0,0.0,0,453,33,0,2018-12-26 02:49:00,486,stages
20598864707,2018-12-26,95,2018-12-26 14:09:30,2,0.0,95.0,1.0,2.0,0.0,0.0,...,0.0,0.0,0.0,0,95,5,0,2018-12-26 12:29:00,100,classic
20608911100,2018-12-27,94,2018-12-27 10:23:30,0,0.0,0.0,0.0,0.0,3.0,63.0,...,33.0,66.0,33.0,0,393,66,0,2018-12-27 02:44:00,459,stages
20624450075,2018-12-28,94,2018-12-28 10:01:30,0,0.0,0.0,0.0,0.0,6.0,83.0,...,37.0,49.0,50.0,1,400,49,0,2018-12-28 02:32:00,449,stages
20626208454,2018-12-28,97,2018-12-28 15:58:00,2,0.0,83.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0,83,3,0,2018-12-28 14:32:00,86,classic


In [4]:
sleep.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 89 entries, 20597811657 to 21725640545
Data columns (total 29 columns):
dateOfSleep                                 89 non-null datetime64[ns]
efficiency                                  89 non-null int64
endTime                                     89 non-null datetime64[ns]
infoCode                                    89 non-null int64
levels.summary.asleep.count                 89 non-null float64
levels.summary.asleep.minutes               89 non-null float64
levels.summary.awake.count                  89 non-null float64
levels.summary.awake.minutes                89 non-null float64
levels.summary.deep.count                   89 non-null float64
levels.summary.deep.minutes                 89 non-null float64
levels.summary.deep.thirtyDayAvgMinutes     89 non-null float64
levels.summary.light.count                  89 non-null float64
levels.summary.light.minutes                89 non-null float64
levels.summary.light.thirtyDayAvgMin

In [5]:
heart_rate.head()

Unnamed: 0,logId,dateTime,value.bpm,value.confidence
0,20597811657.0,2018-12-26 02:49:05,70,1
1,20597811657.0,2018-12-26 02:49:10,73,1
2,20597811657.0,2018-12-26 02:49:15,74,1
3,20597811657.0,2018-12-26 02:49:20,73,1
4,20597811657.0,2018-12-26 02:49:25,74,1


In [6]:
heart_rate.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 215843 entries, 0 to 215842
Data columns (total 4 columns):
logId               215843 non-null float64
dateTime            215843 non-null datetime64[ns]
value.bpm           215843 non-null int64
value.confidence    215843 non-null int64
dtypes: datetime64[ns](1), float64(1), int64(2)
memory usage: 6.6 MB


# Split Data into Pre- & Post- switch

In [7]:
# Split data into pre- and post- single pillow change. Beginning of March for simplicity
presingle_sleep = sleep[sleep['dateOfSleep'].dt.month != 3]
single_sleep = sleep[sleep['dateOfSleep'].dt.month == 3]

In [8]:
# Filter out sleeps less than 3 hours
presingle_nights = presingle_sleep[presingle_sleep['minutesAsleep'] >= 180]
single_nights = single_sleep[single_sleep['minutesAsleep'] >= 180]

# Investigate Heart Rate data during sleep

In [9]:
# Filter out logId in heart_rate not found in presingle_nights and single_nights
presingle_hr = heart_rate[heart_rate['logId'].isin(presingle_nights.index)]
single_hr = heart_rate[heart_rate['logId'].isin(single_nights.index)]

In [10]:
presingle_hr.head(10)

Unnamed: 0,logId,dateTime,value.bpm,value.confidence
0,20597811657.0,2018-12-26 02:49:05,70,1
1,20597811657.0,2018-12-26 02:49:10,73,1
2,20597811657.0,2018-12-26 02:49:15,74,1
3,20597811657.0,2018-12-26 02:49:20,73,1
4,20597811657.0,2018-12-26 02:49:25,74,1
5,20597811657.0,2018-12-26 02:49:35,79,1
6,20597811657.0,2018-12-26 02:49:40,80,1
7,20597811657.0,2018-12-26 02:49:45,79,2
8,20597811657.0,2018-12-26 02:49:55,79,1
9,20597811657.0,2018-12-26 02:50:10,76,1


In [11]:
def standardize_sleep_time(hr_df):
    '''This function adds a column to signify how long into a sleep observation the heart rate
    recording was taken'''
    
    hr_timer = pd.DataFrame()
    
    # Use sleep start times to subtract from each time in hr_df
    for x in hr_df['logId'].unique():
        filtered_hr = hr_df[hr_df['logId'] == x]
        
        filtered_hr['time_asleep'] = filtered_hr['dateTime'] - filtered_hr['dateTime'].min()
        
        hr_timer = hr_timer.append(filtered_hr)
    
    return hr_timer

In [13]:
presingle_hr_timer = standardize_sleep_time(presingle_hr)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [14]:
presingle_hr_timer.head()

Unnamed: 0,logId,dateTime,value.bpm,value.confidence,time_asleep
0,20597811657.0,2018-12-26 02:49:05,70,1,00:00:00
1,20597811657.0,2018-12-26 02:49:10,73,1,00:00:05
2,20597811657.0,2018-12-26 02:49:15,74,1,00:00:10
3,20597811657.0,2018-12-26 02:49:20,73,1,00:00:15
4,20597811657.0,2018-12-26 02:49:25,74,1,00:00:20


In [19]:
test_hr = presingle_hr_timer[presingle_hr_timer['logId'] == 20608911100]

test_trace = go.Scatter(x = test_hr['time_asleep'],
                       y = test_hr['value.bpm'])

test_fig = go.Figure(data = [test_trace])

plotly.offline.plot(test_fig)

'file://C:\\Users\\Chiga\\Desktop\\Projects\\MyFitBit\\EDA\\temp-plot.html'

In [15]:
pre_plotly_hr_data = []

for x in presingle_hr_timer['logId'].unique():
    hr_data = presingle_hr_timer[presingle_hr_timer['logId'] == x]
    
    trace = go.Scatter(x = presingle_hr_timer['time_asleep'],
                      y = presingle_hr_timer['value.bpm'],
                      opacity = 0.6,
                      name = x)
    
    pre_plotly_hr_data.append(trace)

In [16]:
pre_fig = go.Figure(data = pre_plotly_hr_data)

In [17]:
plotly.offline.plot(pre_fig)

'file://C:\\Users\\Chiga\\Desktop\\Projects\\MyFitBit\\EDA\\temp-plot.html'

## Investigate Different Statistics of ``levels``

In [None]:
# Create a regex pattern to filter out level summary counts and minutes
sleep_col_pattern = re.compile('levels.summary.*.[count, minute]')

In [None]:
# Filter out sleeps less than 3 hours and 'levels.summary' columns
presingle_sleep_levels = presingle_sleep.loc[:, [bool(re.match(sleep_col_pattern, x)) for x in sleep.columns]]
postsingle_sleep_levels = single_sleep.loc[:, [bool(re.match(sleep_col_pattern, x)) for x in sleep.columns]]

In [None]:
presingle_sleep_levels.head()

In [None]:
pre_plotly_data = []
for x in presingle_sleep_levels.index:
    
    hr_data = heart_rate[heart_rate['logId'] == x]
    
    trace = go.Scatter(x = hr_data['dateTime'],
                      y = hr_data['value.bpm'],
                      opacity = 0.6,
                      name = x)
    
    pre_plotly_data.append(trace)

In [None]:
pre_fig = go.Figure(data = pre_plotly_data)

In [None]:
plotly.offline.plot(pre_fig)

In [None]:
post_plotly_data = []
for x in postsingle_sleep_levels.index:
    
    hr_data = heart_rate[heart_rate['logId'] == x]
    
    trace = go.Scatter(x = hr_data['dateTime'],
                      y = hr_data['value.bpm'],
                      opacity = 0.6,
                      name = x)
    
    post_plotly_data.append(trace)

In [None]:
post_fig = go.Figure(data = post_plotly_data)

In [None]:
plotly.offline.plot(post_fig)

In [None]:
trace1 = go.Histogram(x = presingle_sleep['levels.summary.wake.count'],
                     opacity = 0.6,
                     name = 'Pre-Single Pillow')

trace2 = go.Histogram( x = single_sleep['levels.summary.wake.count'],
                     opacity = 0.6,
                     name = 'Single Pillow')

data = [trace1, trace2]

In [None]:
layout = go.Layout(barmode = 'overlay',
                  title = 'Distribution of Pre- and Post- Single Pillow Wake',
                  xaxis = dict(title = 'Total Wake Count by Night'),
                  yaxis = dict(title = 'Count'))

In [None]:
fig = go.Figure(data = data, layout = layout)

In [None]:
plotly.offline.plot(fig)