# Exploring Sleep Data

In [1]:
import pandas as pd
import numpy as np

In [2]:
filename = 'data/AutoSleep-20201201-to-20230929.csv'
sleep_df = pd.read_csv(filename, parse_dates=['bedtime', 'waketime'])
sleep_index = pd.to_datetime(sleep_df['ISO8601'], utc=True).dt.strftime('%Y-%m-%d').to_numpy()
sleep_df.index = sleep_index
print(sleep_df.info(verbose=False))
print(sleep_df.columns)
sleep_df.tail(3)

<class 'pandas.core.frame.DataFrame'>
Index: 915 entries, 2020-12-31 to 2023-09-29
Columns: 35 entries, ISO8601 to notes
dtypes: datetime64[ns](2), float64(20), int64(1), object(12)
memory usage: 257.3+ KB
None
Index(['ISO8601', 'fromDate', 'toDate', 'bedtime', 'waketime', 'inBed',
       'awake', 'fellAsleepIn', 'sessions', 'asleep', 'asleepAvg7',
       'efficiency', 'efficiencyAvg7', 'quality', 'qualityAvg7', 'deep',
       'deepAvg7', 'sleepBPM', 'sleepBPMAvg7', 'dayBPM', 'dayBPMAvg7',
       'wakingBPM', 'wakingBPMAvg7', 'hrv', 'hrvAvg7', 'sleepHRV',
       'sleepHRVAvg7', 'SpO2Avg', 'SpO2Min', 'SpO2Max', 'respAvg', 'respMin',
       'respMax', 'tags', 'notes'],
      dtype='object')


Unnamed: 0,ISO8601,fromDate,toDate,bedtime,waketime,inBed,awake,fellAsleepIn,sessions,asleep,...,sleepHRV,sleepHRVAvg7,SpO2Avg,SpO2Min,SpO2Max,respAvg,respMin,respMax,tags,notes
2023-09-27,2023-09-27T20:59:59+02:00,"Tuesday, Sep 26, 2023","Wednesday, Sep 27, 2023",2023-09-27 03:09:00,2023-09-27 12:00:00,08:51:00,00:00:00,00:00:00,1,08:51:00,...,69.0,44.0,97.5,91.0,100.0,16.9,15.0,27.5,,
2023-09-28,2023-09-28T20:59:59+02:00,"Wednesday, Sep 27, 2023","Thursday, Sep 28, 2023",2023-09-28 12:03:00,2023-09-28 13:38:00,01:35:00,00:00:00,00:00:00,1,01:35:00,...,45.0,44.0,93.0,93.0,93.0,,,,,
2023-09-29,2023-09-29T20:59:59+02:00,"Thursday, Sep 28, 2023","Friday, Sep 29, 2023",2023-09-28 23:01:00,2023-09-29 10:04:00,11:03:00,00:43:00,00:00:00,1,10:20:00,...,46.0,44.0,95.9,90.0,100.0,17.3,15.0,21.0,,


In [3]:
filename = 'data/AutoSleep-20230614-to-20230929.csv'
rem_df = pd.read_csv(filename, parse_dates=['bedtime', 'waketime'])
rem_index = pd.to_datetime(rem_df['ISO8601'], utc=True).dt.strftime('%Y-%m-%d').to_numpy()
rem_df.index = rem_index
filt_rem_df = rem_df.loc[rem_df['REM'].notnull()]
print(filt_rem_df.info(verbose=False))
print(filt_rem_df.columns)
filt_rem_df.tail(3)

<class 'pandas.core.frame.DataFrame'>
Index: 98 entries, 2023-06-14 to 2023-09-29
Columns: 35 entries, ISO8601 to notes
dtypes: datetime64[ns](2), float64(18), int64(3), object(12)
memory usage: 27.6+ KB
None
Index(['ISO8601', 'fromDate', 'toDate', 'bedtime', 'waketime', 'inBed',
       'awake', 'fellAsleepIn', 'sessions', 'asleep', 'asleepAvg7',
       'efficiency', 'efficiencyAvg7', 'REM', 'REMAvg7', 'deep', 'deepAvg7',
       'sleepBPM', 'sleepBPMAvg7', 'dayBPM', 'dayBPMAvg7', 'wakingBPM',
       'wakingBPMAvg7', 'hrv', 'hrvAvg7', 'sleepHRV', 'sleepHRVAvg7',
       'SpO2Avg', 'SpO2Min', 'SpO2Max', 'respAvg', 'respMin', 'respMax',
       'tags', 'notes'],
      dtype='object')


Unnamed: 0,ISO8601,fromDate,toDate,bedtime,waketime,inBed,awake,fellAsleepIn,sessions,asleep,...,sleepHRV,sleepHRVAvg7,SpO2Avg,SpO2Min,SpO2Max,respAvg,respMin,respMax,tags,notes
2023-09-26,2023-09-26T21:59:59+02:00,"Monday, Sep 25, 2023","Tuesday, Sep 26, 2023",2023-09-26 06:35:00,2023-09-26 12:59:00,06:24:00,00:18:00,00:00:00,1,06:42:00,...,41,46,97.2,93.0,100.0,17.0,15.0,20.5,,
2023-09-27,2023-09-27T21:59:59+02:00,"Tuesday, Sep 26, 2023","Wednesday, Sep 27, 2023",2023-09-27 03:09:00,2023-09-27 12:00:00,08:51:00,00:03:30,00:00:00,1,08:47:30,...,69,46,97.5,91.0,100.0,16.9,15.0,27.5,,
2023-09-29,2023-09-29T21:59:59+02:00,"Thursday, Sep 28, 2023","Friday, Sep 29, 2023",2023-09-29 00:12:00,2023-09-29 10:04:00,09:52:00,00:26:30,00:00:00,1,10:18:30,...,46,47,95.9,90.0,100.0,17.2,15.5,21.0,,


In [4]:
on = filt_rem_df.columns.intersection(sleep_df.columns).to_list()
others = filt_rem_df.columns.difference(sleep_df.columns).to_list()
others

['REM', 'REMAvg7']

In [5]:
merged_df = sleep_df.copy()
merged_df.loc[sleep_df.index.isin(filt_rem_df.index), on] = filt_rem_df
merged_df.loc[sleep_df.index.isin(filt_rem_df.index), others] = filt_rem_df
print(merged_df.columns)
merged_df.tail(3)

Index(['ISO8601', 'fromDate', 'toDate', 'bedtime', 'waketime', 'inBed',
       'awake', 'fellAsleepIn', 'sessions', 'asleep', 'asleepAvg7',
       'efficiency', 'efficiencyAvg7', 'quality', 'qualityAvg7', 'deep',
       'deepAvg7', 'sleepBPM', 'sleepBPMAvg7', 'dayBPM', 'dayBPMAvg7',
       'wakingBPM', 'wakingBPMAvg7', 'hrv', 'hrvAvg7', 'sleepHRV',
       'sleepHRVAvg7', 'SpO2Avg', 'SpO2Min', 'SpO2Max', 'respAvg', 'respMin',
       'respMax', 'tags', 'notes', 'REM', 'REMAvg7'],
      dtype='object')


Unnamed: 0,ISO8601,fromDate,toDate,bedtime,waketime,inBed,awake,fellAsleepIn,sessions,asleep,...,SpO2Avg,SpO2Min,SpO2Max,respAvg,respMin,respMax,tags,notes,REM,REMAvg7
2023-09-27,2023-09-27T21:59:59+02:00,"Tuesday, Sep 26, 2023","Wednesday, Sep 27, 2023",2023-09-27 03:09:00,2023-09-27 12:00:00,08:51:00,00:03:30,00:00:00,1,08:47:30,...,97.5,91.0,100.0,16.9,15.0,27.5,,,02:10:00,01:58:21
2023-09-28,2023-09-28T20:59:59+02:00,"Wednesday, Sep 27, 2023","Thursday, Sep 28, 2023",2023-09-28 12:03:00,2023-09-28 13:38:00,01:35:00,00:00:00,00:00:00,1,01:35:00,...,93.0,93.0,93.0,,,,,,,
2023-09-29,2023-09-29T21:59:59+02:00,"Thursday, Sep 28, 2023","Friday, Sep 29, 2023",2023-09-29 00:12:00,2023-09-29 10:04:00,09:52:00,00:26:30,00:00:00,1,10:18:30,...,95.9,90.0,100.0,17.2,15.5,21.0,,,02:59:00,02:10:12


In [7]:
clean_cols = ['fromDate', 'toDate', 'bedtime', 'waketime', 'inBed', 'awake', 
              'fellAsleepIn', 'sessions', 'asleep', 'efficiency', 'quality', 
              'deep', 'sleepBPM', 'dayBPM', 'wakingBPM', 'hrv', 'sleepHRV',
              'SpO2Avg', 'SpO2Min', 'SpO2Max', 'respAvg', 'respMin', 'respMax', 
              'REM']

clean_df = merged_df.loc[:, clean_cols]
clean_df.tail(3)

Unnamed: 0,fromDate,toDate,bedtime,waketime,inBed,awake,fellAsleepIn,sessions,asleep,efficiency,...,wakingBPM,hrv,sleepHRV,SpO2Avg,SpO2Min,SpO2Max,respAvg,respMin,respMax,REM
2023-09-27,"Tuesday, Sep 26, 2023","Wednesday, Sep 27, 2023",2023-09-27 03:09:00,2023-09-27 12:00:00,08:51:00,00:03:30,00:00:00,1,08:47:30,99.3,...,53.0,,69.0,97.5,91.0,100.0,16.9,15.0,27.5,02:10:00
2023-09-28,"Wednesday, Sep 27, 2023","Thursday, Sep 28, 2023",2023-09-28 12:03:00,2023-09-28 13:38:00,01:35:00,00:00:00,00:00:00,1,01:35:00,100.0,...,63.0,,45.0,93.0,93.0,93.0,,,,
2023-09-29,"Thursday, Sep 28, 2023","Friday, Sep 29, 2023",2023-09-29 00:12:00,2023-09-29 10:04:00,09:52:00,00:26:30,00:00:00,1,10:18:30,104.5,...,53.0,,46.0,95.9,90.0,100.0,17.2,15.5,21.0,02:59:00


In [60]:
mult_sesh_df = clean_df.loc[clean_df['sessions'] > 1, ['bedtime', 'waketime', 'asleep']]
slp_hrs = [int(i[:2]) for i in mult_sesh_df['asleep'].values]
slp_mins = [int(i[3:5]) for i in mult_sesh_df['asleep'].values]
mult_sesh_df['sleep_mins'] = [(slp_hrs[i] * 60) + slp_mins[i] for i in range(len(slp_mins))]

mult_sesh_df['timed_sleep'] = (mult_sesh_df['waketime'] - mult_sesh_df['bedtime'])
mult_sesh_df['timed_mins'] = mult_sesh_df['timed_sleep'].dt.total_seconds() // 60

mult_sesh_df['time_diff'] = mult_sesh_df['sleep_mins'] - mult_sesh_df['timed_mins']
mult_sesh_df.loc[(mult_sesh_df['time_diff'] > 60) | (mult_sesh_df['time_diff'] < -60)]
# mult_sesh_df

Unnamed: 0,bedtime,waketime,asleep,sleep_mins,timed_sleep,timed_mins,time_diff
2021-01-11,2021-01-11 00:30:00,2021-01-11 01:05:00,08:06:00,486,0 days 00:35:00,35.0,451.0
2021-01-13,2021-01-13 03:15:00,2021-01-13 03:58:00,09:26:00,566,0 days 00:43:00,43.0,523.0
2021-01-26,2021-01-26 05:45:00,2021-01-26 05:59:59,07:20:59,440,0 days 00:14:59,14.0,426.0
2021-01-31,2021-01-31 00:13:00,2021-01-31 00:44:00,09:34:00,574,0 days 00:31:00,31.0,543.0
2021-02-04,2021-02-04 02:31:00,2021-02-04 05:15:00,09:23:00,563,0 days 02:44:00,164.0,399.0
...,...,...,...,...,...,...,...
2023-04-29,2023-04-29 04:07:00,2023-04-29 11:59:00,09:13:00,553,0 days 07:52:00,472.0,81.0
2023-05-13,2023-05-13 01:30:00,2023-05-13 10:53:00,07:53:00,473,0 days 09:23:00,563.0,-90.0
2023-05-27,2023-05-27 01:45:00,2023-05-27 06:14:00,05:37:00,337,0 days 04:29:00,269.0,68.0
2023-06-10,2023-06-10 01:28:00,2023-06-10 01:52:00,08:22:00,502,0 days 00:24:00,24.0,478.0


## START HERE

- excluding wake/bed times that don't match up with time asleep


In [207]:
def assign_timezones(old_df, columns, local_tz, current_tz, start_date=None, end_date=None):
    df = old_df.copy()
    
    def localize_and_convert(ser):
        new_ser = ser.dt.tz_localize(current_tz, nonexistent='shift_backward')
        new_ser = new_ser.dt.tz_convert(local_tz)
        # return pd.to_datetime(new_ser, utc=True).dt.strftime('%Y-%m-%d %H:%M:%S')
        dates = new_ser.dt.strftime('%Y-%m-%d')
        times = new_ser.dt.strftime('%H:%M:%S')
        return dates, times
    
    def filter(column, date):
        if end_date and not start_date:
            ser = df.loc[df.index < end_date, column]
            dates, times = localize_and_convert(ser)
            df.loc[df.index < end_date, column] = times
            df.loc[df.index < end_date, date] = dates
            df.loc[df.index < end_date, 'tz'] = local_tz
                    
        elif start_date and not end_date:
            ser = df.loc[df.index >= start_date, column]
            dates, times = localize_and_convert(ser)
            df.loc[df.index >= start_date, column] = times
            df.loc[df.index >= start_date, date] = dates
            df.loc[df.index >= start_date, 'tz'] = local_tz

        else:
            ser = df.loc[(df.index >= start_date) & (df.index < end_date), column]
            dates, times = localize_and_convert(ser)
            df.loc[(df.index >= start_date) & (df.index < end_date), column] = times
            df.loc[(df.index >= start_date) & (df.index < end_date), date] = dates
            df.loc[(df.index >= start_date) & (df.index < end_date), 'tz'] = local_tz
            
    for col in columns:
        if col == 'bedtime':
            filter(col, 'fromDate')
        elif col == 'waketime':
            filter(col, 'toDate')
        else:
            print('error')
        
    return df

current_tz = 'Europe/Berlin'
tz_df = assign_timezones(clean_df, ['bedtime', 'waketime'], 'US/Mountain', current_tz, end_date='2021-05-10')
tz_df = assign_timezones(tz_df, ['bedtime', 'waketime'], 'US/Eastern', current_tz, start_date='2021-05-10', end_date='2022-09-22')
tz_df = assign_timezones(tz_df, ['bedtime', 'waketime'], 'US/Central', current_tz, start_date='2022-08-23', end_date='2022-09-29')
tz_df = assign_timezones(tz_df, ['bedtime', 'waketime'], 'Europe/Berlin', current_tz, start_date='2022-09-29')
tz_df['bedtime'] = tz_df['bedtime'].dt.strftime('%H:%M')
tz_df['waketime'] = tz_df['waketime'].dt.strftime('%H:%M')

In [227]:
tz_df.head(3)

Unnamed: 0,fromDate,toDate,bedtime,waketime,inBed,awake,fellAsleepIn,sessions,asleep,efficiency,...,hrv,sleepHRV,SpO2Avg,SpO2Min,SpO2Max,respAvg,respMin,respMax,REM,tz
2020-12-31,2020-12-31,2020-12-31,01:11,09:30,08:18:33,00:18:33,00:18:33,1,08:00:00,96.3,...,33.0,43.0,94.5,90.0,98.0,,,,,US/Mountain
2021-01-01,2021-01-01,2021-01-01,01:42,09:03,07:21:00,00:00:00,00:00:00,1,07:21:00,100.0,...,,46.0,93.7,92.0,96.0,,,,,US/Mountain
2021-01-02,2021-01-01,2021-01-02,23:11,09:20,10:09:00,01:09:00,00:00:00,1,09:00:00,88.7,...,,61.0,92.6,87.0,96.0,,,,,US/Mountain


In [239]:
bt_hrs = [int(i[:2]) for i in tz_df['bedtime'].values]
bt_mins = [int(i[3:]) for i in tz_df['bedtime'].values]
t_bt_mins = pd.Series([(bt_hrs[i] * 60) + bt_mins[i] for i in range(len(bt_hrs))])

In [240]:
mean_mins = t_bt_mins.
mean_mins

433.99890710382516

In [236]:
print('bedtime mean: ', f'{int(mean_mins/60)}:{int(np.round(mean_mins%60))}')

bedtime mean:  7:14
