In [1]:
import json
import pandas as pd
import numpy as np
from pathlib import Path 

import hvplot.pandas



In [15]:
# load the sleep data into a pandas dataframe
sleep_data_json = Path('../data/raw/dummy_data.json')

with open(sleep_data_json) as f:
    sleep_data = json.load(f)

df = pd.DataFrame(sleep_data)

In [16]:
# filter by long_sleep to get nighttime sleep
df[df['type'] == 'long_sleep']

Unnamed: 0,id,average_breath,average_heart_rate,average_hrv,awake_time,bedtime_end,bedtime_start,day,deep_sleep_duration,efficiency,...,readiness,readiness_score_delta,rem_sleep_duration,restless_periods,sleep_phase_5_min,sleep_score_delta,sleep_algorithm_version,time_in_bed,total_sleep_duration,type
0,05255bcf-717d-43f3-a578-8bcdc5803d90,14.625,59.625,83.0,2479,2024-10-01T06:51:21-04:00,2024-09-30T23:53:02-04:00,2024-10-01,2640,90,...,"{'contributors': {'activity_balance': 100, 'bo...",0.0,4860,131,4224442211111114222242222222222222333333222222...,0.0,v2,25099,22620,long_sleep
2,2758a095-95eb-456a-a11a-7f093c5e751b,13.75,64.125,66.0,3519,2024-10-02T07:22:38-04:00,2024-10-02T00:32:59-04:00,2024-10-02,720,86,...,"{'contributors': {'activity_balance': 99, 'bod...",0.0,5550,99,4422112244224242122222222242333333322222422222...,0.0,v2,24579,21060,long_sleep
5,d2595754-011c-4a1e-b6b5-e67ff6c11e97,13.75,64.125,56.0,3480,2024-10-03T04:37:58-04:00,2024-10-02T22:51:58-04:00,2024-10-03,690,83,...,"{'contributors': {'activity_balance': 96, 'bod...",0.0,1920,65,4444221122222242224222233332222222222222222222...,0.0,v2,20760,17280,long_sleep
6,3cd913e8-6160-43ed-b426-9db238411de5,15.75,66.375,66.0,7028,2024-10-04T06:52:39-04:00,2024-10-03T23:15:01-04:00,2024-10-04,3780,74,...,"{'contributors': {'activity_balance': 94, 'bod...",0.0,4320,74,4444421142222211222223333322111114222222222223...,0.0,v2,27458,20430,long_sleep
12,50d5fb7c-923d-4711-a33d-74adaa6894e2,13.5,64.375,54.0,8040,2024-10-05T09:30:32-04:00,2024-10-05T00:01:32-04:00,2024-10-05,1050,76,...,"{'contributors': {'activity_balance': 96, 'bod...",0.0,6900,124,4444222222214222222222244223333222222222222122...,0.0,v2,34140,26100,long_sleep
14,0b104562-9f4e-4780-ace0-bc5e03c6820f,15.25,60.0,73.0,4890,2024-10-06T07:52:27-04:00,2024-10-06T00:54:27-04:00,2024-10-06,1380,81,...,"{'contributors': {'activity_balance': 89, 'bod...",0.0,9840,117,4444222422224222224332433333334322222123223222...,0.0,v2,25080,20190,long_sleep
15,c126e1ba-4c97-475d-9552-9a829d6b122a,15.75,63.0,63.0,4020,2024-10-07T07:35:59-04:00,2024-10-07T01:35:29-04:00,2024-10-07,1980,81,...,"{'contributors': {'activity_balance': 93, 'bod...",0.0,3960,98,4444222111112222222214222222333332122222222242...,0.0,v2,21630,17610,long_sleep


In [17]:
# examine one night of sleep
df_1night = df[df['type'] == 'long_sleep'].iloc[5]
df_1night

id                                      0b104562-9f4e-4780-ace0-bc5e03c6820f
average_breath                                                         15.25
average_heart_rate                                                      60.0
average_hrv                                                             73.0
awake_time                                                              4890
bedtime_end                                        2024-10-06T07:52:27-04:00
bedtime_start                                      2024-10-06T00:54:27-04:00
day                                                               2024-10-06
deep_sleep_duration                                                     1380
efficiency                                                                81
heart_rate                 {'interval': 300.0, 'items': [None, 59.0, 58.0...
hrv                        {'interval': 300.0, 'items': [None, 52.0, 52.0...
latency                                                                  930

In [18]:
# 420 min worth of sleep phase
len(df_1night['sleep_phase_5_min']) * 5 

420

In [19]:
# 418 min worth worth of movement #
len(df_1night['movement_30_sec']) / 2 

418.0

In [20]:
# 418 min = 25080 sec, which matches up with 'time_in_bed' in the json file
df_1night['time_in_bed']

np.int64(25080)

In [21]:
df[df['type'] == 'long_sleep']['day'].unique()

array(['2024-10-01', '2024-10-02', '2024-10-03', '2024-10-04',
       '2024-10-05', '2024-10-06', '2024-10-07'], dtype=object)

In [22]:
# Convert the bedtime_start to a pandas datetime object
bedtime_start = pd.to_datetime(df_1night['bedtime_start'])

# Extract the movement and sleep phase data
movement_30_sec = [int(s) for s in list(df_1night['movement_30_sec'])]
sleep_phase_5_min = [int(s) for s in list(df_1night['sleep_phase_5_min'])]

# Create a pandas timeseries with 30-second intervals
timeseries_30_sec = pd.date_range(start=bedtime_start, periods=len(movement_30_sec), freq='30s')

# Repeat the sleep phase data to match the new times array
sleep_phase_30_sec = np.repeat(sleep_phase_5_min, (5 * 60) // 30)[:len(timeseries_30_sec)]

# Create the resampled DataFrame
df_resampled = pd.DataFrame({'time': timeseries_30_sec, 'movement': movement_30_sec, 'sleep_phase': sleep_phase_30_sec})
df_resampled.loc[:,'day'] = df_1night['day']
df_resampled.set_index(['day', 'time'], inplace=True)
df_resampled

Unnamed: 0_level_0,Unnamed: 1_level_0,movement,sleep_phase
day,time,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-10-06,2024-10-06 00:54:27-04:00,1,4
2024-10-06,2024-10-06 00:54:57-04:00,1,4
2024-10-06,2024-10-06 00:55:27-04:00,2,4
2024-10-06,2024-10-06 00:55:57-04:00,1,4
2024-10-06,2024-10-06 00:56:27-04:00,2,4
2024-10-06,...,...,...
2024-10-06,2024-10-06 07:49:57-04:00,1,4
2024-10-06,2024-10-06 07:50:27-04:00,2,4
2024-10-06,2024-10-06 07:50:57-04:00,2,4
2024-10-06,2024-10-06 07:51:27-04:00,2,4


In [23]:
df_resampled = pd.read_parquet('../data/interim/dummy_data.parquet')

In [24]:
df_resampled

Unnamed: 0_level_0,Unnamed: 1_level_0,movement,sleep_phase
day,time,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-10-01,2024-09-30 23:53:02-04:00,1,4
2024-10-01,2024-09-30 23:53:32-04:00,1,4
2024-10-01,2024-09-30 23:54:02-04:00,3,4
2024-10-01,2024-09-30 23:54:32-04:00,2,4
2024-10-01,2024-09-30 23:55:02-04:00,3,4
...,...,...,...
2024-10-07,2024-10-07 07:33:29-04:00,1,4
2024-10-07,2024-10-07 07:33:59-04:00,1,4
2024-10-07,2024-10-07 07:34:29-04:00,1,4
2024-10-07,2024-10-07 07:34:59-04:00,1,4


In [25]:
# `identify potential RBD events
df_resampled.loc[(df_resampled['movement'] >= 3) & (df_resampled['sleep_phase'] == 3), 'RBD'] = df_resampled['movement'] + 0.5
df_resampled.loc[(df_resampled['movement'] >= 3) & (df_resampled['sleep_phase'] == 3)]

Unnamed: 0_level_0,Unnamed: 1_level_0,movement,sleep_phase,RBD
day,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2024-10-05,2024-10-05 04:50:02-04:00,3,3,3.5
2024-10-06,2024-10-06 02:36:27-04:00,4,3,4.5
2024-10-06,2024-10-06 02:36:57-04:00,3,3,3.5
2024-10-06,2024-10-06 04:10:27-04:00,3,3,3.5
2024-10-06,2024-10-06 04:26:27-04:00,3,3,3.5
2024-10-06,2024-10-06 06:40:27-04:00,3,3,3.5
2024-10-06,2024-10-06 07:00:57-04:00,3,3,3.5
2024-10-06,2024-10-06 07:01:27-04:00,3,3,3.5
2024-10-07,2024-10-07 07:03:29-04:00,3,3,3.5
2024-10-07,2024-10-07 07:24:29-04:00,3,3,3.5


In [26]:
# Make a plot of the sleep phase and movement data, with potential RBD events highlighted

from holoviews import opts
from bokeh.models.formatters import DatetimeTickFormatter

# Create a DatetimeTickFormatter with the desired format
formatter = DatetimeTickFormatter(hours='%H:%M', minutes='%H:%M')

# Define the mapping of numerical values to text labels for sleep phases
sleep_phase_labels = {
    1: 'Deep Sleep',
    2: 'Light Sleep',
    3: 'REM Sleep',
    4: 'Awake'
}

# Plot the line and scatter with a secondary y-axis
line_plot = df_resampled.hvplot.line(x='time', y='sleep_phase', xformatter=formatter, label='Sleep Phase', groupby='day').opts(
    ylabel='Sleep Phase',
    yticks=[(k, v) for k, v in sleep_phase_labels.items()]
)
line_plot2 = df_resampled.hvplot.line(x='time', y='movement', xformatter=formatter, label='Movement', groupby='day').opts(
    ylabel='Movement', color='orange', line_width=.5
)
scatter_plot = df_resampled.hvplot.scatter(x='time', y='RBD', color='green', size=100, label='RBD Event?', groupby='day').opts(
    yaxis='right',
    ylabel='RBD'
)

# Combine the plots
combined_plot = line_plot * line_plot2 * scatter_plot
combined_plot.opts(
    opts.Overlay(
        yaxis='left',
        yformatter='%d'  # Adjust y-axis formatter if needed
    )
)

BokehModel(combine_events=True, render_bundle={'docs_json': {'6bb2d132-b252-4de6-847e-330f56f09019': {'version…