In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime as dt

dacs_all=pd.read_csv(r"D:\MentalHealth\dacs_91_sleep_data.csv")

dacs_all['start_date']=pd.to_datetime(dacs_all['start_date'])
dacs_all['end_date']=pd.to_datetime(dacs_all['end_date'])

# add 'sleep efficiency' column
dacs_all['sleep_efficiency']=dacs_all['sleep_duration']/dacs_all['bed_duration']

# clean some useless columns
dacs_all = dacs_all.drop(['sleep_data_id','start_time','end_time'], axis=1)

# remove the unappeared users in Apr from dataframe, and re-run the previous lines to check  
dacs_all = dacs_all[(dacs_all.PID !='3-195') & (dacs_all.PID !='3-37') &\
                    (dacs_all.PID !='3-13') & (dacs_all.PID !='3-41') &\
                    (dacs_all.PID !='3-125') & (dacs_all.PID !='3-187') &\
                    (dacs_all.PID !='3-3') & (dacs_all.PID !='3-169') &\
                    (dacs_all.PID !='3-85') & (dacs_all.PID !='3-121') &\
                    (dacs_all.PID !='3-51') ]

In [2]:
###################################################
# make a boolean mask of window
###################################################

def get_masked_dataframe(start_date, end_date, df):
    mask = (df['start_date']>= start_date) & (df['start_date'] < end_date)
    new_df = df.loc[mask]
    new_df = new_df.set_index('start_date')
    return new_df

start_date = dt.strptime('2020-02-01 00:00:00','%Y-%m-%d %H:%M:%S')
end_date = dt.strptime('2020-05-02 00:00:00','%Y-%m-%d %H:%M:%S')
new_all = get_masked_dataframe(start_date, end_date, dacs_all)


list_each_user=[]
for name,group in new_all.groupby(['PID']):
    list_each_user.append(group)


In [3]:
# Feb as baseline
end_date_b = dt.strptime('2020-03-01 00:00:00','%Y-%m-%d %H:%M:%S')
feb_baseline_all = get_masked_dataframe(start_date, end_date_b, dacs_all)
#----------------------------------------
# Mar as a whole
end_date_mar = dt.strptime('2020-04-01 00:00:00','%Y-%m-%d %H:%M:%S')
mar_whole_month = get_masked_dataframe(end_date_b, end_date_mar, dacs_all)
#----------------------------------------
# Apr as a whole
end_date_apr = dt.strptime('2020-05-01 00:00:00','%Y-%m-%d %H:%M:%S')
apr_whole_month = get_masked_dataframe(end_date_mar, end_date_apr, dacs_all)

In [4]:
###################################################
# using pandas rolling window to get the mean and std
###################################################
# to use indow as offset, change the index into datetime

def one_week_sleep_parameters_mean(aaa,win,mp):
    '''
    aaa: each_user dataframe, with index as start_date
    mp: an int for min_periods
    win: window offset
    '''
    # any NAN in the results indicate the previous window contain 'Nan' 
    row1 = aaa['sleep_duration'].rolling(window = win,min_periods=mp).mean()
    row2 = aaa['bed_duration'].rolling(window = win,min_periods=mp).mean()
    row3 = aaa['sleep_score'].rolling(window = win,min_periods=mp).mean()
    row4 = aaa['awake_duration'].rolling(window = win,min_periods=mp).mean()
    row5 = aaa['rem_sleep_duration'].rolling(window = win,min_periods=mp).mean()
    row6 = aaa['light_sleep_duration'].rolling(window = win,min_periods=mp).mean()
    row7 = aaa['deep_sleep_duration'].rolling(window = win,min_periods=mp).mean()
    row8 = aaa['sleep_onset_duration'].rolling(window = win,min_periods=mp).mean()
    row9 = aaa['bed_exit_count'].rolling(window = win,min_periods=mp).mean()
    row10 = aaa['toss_turn_count'].rolling(window = win,min_periods=mp).mean()
    row11 = aaa['average_heart_rate'].rolling(window = win,min_periods=mp).mean()
    row12 = aaa['min_heart_rate'].rolling(window = win,min_periods=mp).mean()
    row13 = aaa['max_heart_rate'].rolling(window = win,min_periods=mp).mean()
    row14 = aaa['average_respiration_rate'].rolling(window = win,min_periods=mp).mean()
    row15 = aaa['min_respiration_rate'].rolling(window = win,min_periods=mp).mean()
    row16 = aaa['max_respiration_rate'].rolling(window = win,min_periods=mp).mean()
    row17 = aaa['average_physical_activity'].rolling(window = win,min_periods=mp).mean()
    row18 = aaa['fm_count'].rolling(window = win,min_periods=mp).mean()
    row19 = aaa['awakenings'].rolling(window = win,min_periods=mp).mean()
    row20 = aaa['hrv_score'].rolling(window = win,min_periods=mp).mean()
    row21 = aaa['hrv_lf'].rolling(window = win,min_periods=mp).mean()
    row22 = aaa['hrv_hf'].rolling(window = win,min_periods=mp).mean()
    row23 = aaa['hrv_rmssd_evening'].rolling(window = win,min_periods=mp).mean()
    row24 = aaa['hrv_rmssd_morning'].rolling(window = win,min_periods=mp).mean()
    row25 = aaa['sleep_efficiency'].rolling(window = win,min_periods=mp).mean()
    
    series_list = [row1, row2, row3, row4, row5, row6, row7, row8, row9, row10, row11, 
     row12, row13, row14, row15, row16, row17, row18, row19, row20, row21,
     row22, row23, row24, row25]
    
    dataframe_with_startdate_index = []
    for each_series in series_list:
        convert_to_dataframe = each_series.to_frame()
        dataframe_with_startdate_index.append(convert_to_dataframe)
    
    aa1 = pd.concat(dataframe_with_startdate_index,axis=1)
    return aa1

In [5]:
rolling_mean_list = []
for each_PID_df in list_each_user:
     each_PID_rolling_mean = one_week_sleep_parameters_mean(each_PID_df.sort_index(),'7D',6)
     rolling_mean_list.append(each_PID_rolling_mean)

In [6]:
def one_week_sleep_parameters_std(aaa,win,mp):
    '''
    aaa: each_user dataframe, with index as start_date
    mp: an int for min_periods
    win: window offset
    '''
    # any NAN in the results indicate the previous window contain 'Nan' 
    row1 = aaa['sleep_duration'].rolling(window = win,min_periods=mp).std()
    row2 = aaa['bed_duration'].rolling(window = win,min_periods=mp).std()
    row3 = aaa['sleep_score'].rolling(window = win,min_periods=mp).std()
    row4 = aaa['awake_duration'].rolling(window = win,min_periods=mp).std()
    row5 = aaa['rem_sleep_duration'].rolling(window = win,min_periods=mp).std()
    row6 = aaa['light_sleep_duration'].rolling(window = win,min_periods=mp).std()
    row7 = aaa['deep_sleep_duration'].rolling(window = win,min_periods=mp).std()
    row8 = aaa['sleep_onset_duration'].rolling(window = win,min_periods=mp).std()
    row9 = aaa['bed_exit_count'].rolling(window = win,min_periods=mp).std()
    row10 = aaa['toss_turn_count'].rolling(window = win,min_periods=mp).std()
    row11 = aaa['average_heart_rate'].rolling(window = win,min_periods=mp).std()
    row12 = aaa['min_heart_rate'].rolling(window = win,min_periods=mp).std()
    row13 = aaa['max_heart_rate'].rolling(window = win,min_periods=mp).std()
    row14 = aaa['average_respiration_rate'].rolling(window = win,min_periods=mp).std()
    row15 = aaa['min_respiration_rate'].rolling(window = win,min_periods=mp).std()
    row16 = aaa['max_respiration_rate'].rolling(window = win,min_periods=mp).std()
    row17 = aaa['average_physical_activity'].rolling(window = win,min_periods=mp).std()
    row18 = aaa['fm_count'].rolling(window = win,min_periods=mp).std()
    row19 = aaa['awakenings'].rolling(window = win,min_periods=mp).std()
    row20 = aaa['hrv_score'].rolling(window = win,min_periods=mp).std()
    row21 = aaa['hrv_lf'].rolling(window = win,min_periods=mp).std()
    row22 = aaa['hrv_hf'].rolling(window = win,min_periods=mp).std()
    row23 = aaa['hrv_rmssd_evening'].rolling(window = win,min_periods=mp).std()
    row24 = aaa['hrv_rmssd_morning'].rolling(window = win,min_periods=mp).std()
    row25 = aaa['sleep_efficiency'].rolling(window = win,min_periods=mp).std()
    
    series_list = [row1, row2, row3, row4, row5, row6, row7, row8, row9, row10, row11, 
     row12, row13, row14, row15, row16, row17, row18, row19, row20, row21,
     row22, row23, row24, row25]
    
    dataframe_with_startdate_index = []
    for each_series in series_list:
        convert_to_dataframe = each_series.to_frame()
        dataframe_with_startdate_index.append(convert_to_dataframe)
    
    aa1 = pd.concat(dataframe_with_startdate_index,axis=1)
    return aa1

In [7]:
rolling_std_list = []
for each_PID_df in list_each_user:
     each_PID_rolling_std = one_week_sleep_parameters_std(each_PID_df.sort_index(),'7D',6)
     rolling_std_list.append(each_PID_rolling_std)

In [17]:
# add the PID and start_date back to each dataframe
user_list=[]
for name,group in new_all.groupby(['PID']):
    user_list.append(name)

time_index_list=[]
for each_user in list_each_user:
    time_index = sorted(each_user.index.tolist())
    time_index_list.append(time_index)

for i in range(len(time_index_list)):
    each_mean = rolling_std_list[i] # dataframe
    each_user_time = time_index_list[i] # list
    each_mean['start_time']=each_user_time
    each_mean['PID']=user_list[i]       

for i in range(len(time_index_list)):
    each_mean = rolling_mean_list[i] # dataframe
    each_user_time = time_index_list[i] # list
    each_mean['start_time']=each_user_time
    each_mean['PID']=user_list[i] 

In [19]:
# The length of rolling_std_list and rolling_mean_list are 61, which reprsent 61 users that have sleep data up to Apr

rolling_mean_list[0].reset_index(drop=True).style.background_gradient(cmap='Reds')

Unnamed: 0,sleep_duration,bed_duration,sleep_score,awake_duration,rem_sleep_duration,light_sleep_duration,deep_sleep_duration,sleep_onset_duration,bed_exit_count,toss_turn_count,average_heart_rate,min_heart_rate,max_heart_rate,average_respiration_rate,min_respiration_rate,max_respiration_rate,average_physical_activity,fm_count,awakenings,hrv_score,hrv_lf,hrv_hf,hrv_rmssd_evening,hrv_rmssd_morning,sleep_efficiency,start_time,PID
0,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-01 22:02:09,3-1
1,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-02 22:02:17,3-1
2,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-03 23:02:16,3-1
3,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-04 22:02:00,3-1
4,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-04 22:02:00,3-1
5,26425.0,28705.2,82.8333,3000.0,6530.0,15155.0,4740.0,1600.0,1.66667,28.5,59.9167,46.0,108.167,18.6,8.16667,24.8333,33.0,0.0,0.666667,26.1667,59.6667,40.3333,17.9333,18.2167,0.919439,2020-02-05 23:02:22,3-1
6,25821.4,28228.4,81.0,3141.43,6081.43,14918.6,4821.43,1658.57,1.85714,29.0,58.9429,45.8571,108.714,18.4,8.57143,24.2857,37.7143,0.0,0.714286,27.0,59.2857,40.7143,18.4857,18.6429,0.913107,2020-02-07 00:02:07,3-1
7,25552.5,28240.4,79.25,3401.25,5992.5,14868.8,4691.25,1680.0,2.0,30.375,58.8625,46.0,110.375,18.4125,8.5,24.25,42.125,0.0,1.125,26.125,59.375,40.625,18.5625,17.825,0.90343,2020-02-07 23:02:59,3-1
8,25413.8,27984.1,79.0,3198.75,6240.0,14602.5,4571.25,1541.25,1.875,32.0,58.875,46.25,109.125,18.3125,8.25,24.125,45.75,0.0,1.125,25.25,60.25,39.75,18.7875,16.6875,0.906122,2020-02-08 22:02:44,3-1
9,25938.8,28785.9,80.75,3401.25,6592.5,14625.0,4721.25,1417.5,1.875,34.25,57.9,46.75,112.625,18.175,8.125,24.0,49.875,0.0,1.5,24.75,60.0,40.0,18.8,16.55,0.899767,2020-02-09 23:02:16,3-1


In [20]:
rolling_std_list[0].reset_index(drop=True).style.background_gradient(cmap='Blues')

Unnamed: 0,sleep_duration,bed_duration,sleep_score,awake_duration,rem_sleep_duration,light_sleep_duration,deep_sleep_duration,sleep_onset_duration,bed_exit_count,toss_turn_count,average_heart_rate,min_heart_rate,max_heart_rate,average_respiration_rate,min_respiration_rate,max_respiration_rate,average_physical_activity,fm_count,awakenings,hrv_score,hrv_lf,hrv_hf,hrv_rmssd_evening,hrv_rmssd_morning,sleep_efficiency,start_time,PID
0,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-01 22:02:09,3-1
1,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-02 22:02:17,3-1
2,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-03 23:02:16,3-1
3,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-04 22:02:00,3-1
4,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-04 22:02:00,3-1
5,4862.27,5002.04,16.1916,1305.74,1338.1,3414.17,1178.81,645.848,1.50555,5.78792,4.02414,2.68328,11.4091,0.657267,0.752773,1.16905,4.0,0.0,1.0328,3.43026,7.0899,7.0899,3.00311,1.72443,0.033242,2020-02-05 23:02:22,3-1
6,4717.15,4737.22,15.5563,1249.33,1703.12,3178.85,1097.46,609.602,1.46385,5.44671,4.48697,2.47848,10.5153,0.8,1.27242,1.79947,12.9963,0.0,0.95119,3.82971,6.55017,6.55017,3.10667,1.93637,0.0346623,2020-02-07 00:02:07,3-1
7,4432.98,4385.95,15.2292,1370.36,1596.72,2946.42,1080.71,567.627,1.41421,6.36817,4.16034,2.32993,10.8092,0.741499,1.19523,1.66905,17.3324,0.0,1.45774,4.32394,6.06954,6.06954,2.88441,2.9266,0.042179,2020-02-07 23:02:59,3-1
8,4341.83,4122.23,15.0807,1197.31,1498.71,2749.91,913.039,534.721,1.55265,8.0,4.15718,2.18763,10.7761,0.766136,1.48805,1.64208,18.1639,0.0,1.45774,5.57418,7.62983,7.62983,2.87772,3.7817,0.0401066,2020-02-08 22:02:44,3-1
9,4615.63,4647.77,15.9978,1275.91,1960.73,2749.03,941.464,508.801,1.55265,10.7138,2.13341,2.05287,6.09303,0.604152,1.64208,1.60357,20.0602,0.0,1.51186,6.15862,7.44504,7.44504,2.84705,3.81763,0.0406853,2020-02-09 23:02:16,3-1


In [21]:
rolling_mean_list[1].reset_index(drop=True).style.background_gradient(cmap='Reds')

Unnamed: 0,sleep_duration,bed_duration,sleep_score,awake_duration,rem_sleep_duration,light_sleep_duration,deep_sleep_duration,sleep_onset_duration,bed_exit_count,toss_turn_count,average_heart_rate,min_heart_rate,max_heart_rate,average_respiration_rate,min_respiration_rate,max_respiration_rate,average_physical_activity,fm_count,awakenings,hrv_score,hrv_lf,hrv_hf,hrv_rmssd_evening,hrv_rmssd_morning,sleep_efficiency,start_time,PID
0,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-01 21:02:22,3-10
1,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-02 21:02:58,3-10
2,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-03 23:02:42,3-10
3,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-04 14:02:07,3-10
4,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-04 22:02:54,3-10
5,25260.0,28937.2,76.8333,4845.0,6475.0,14235.0,4550.0,2335.0,3.16667,47.0,64.9167,47.8333,104.333,10.9333,5.66667,21.5,56.6667,0.0,1.66667,22.6667,41.1667,25.5,,,0.808936,2020-02-05 14:02:40,3-10
6,26134.3,29690.4,79.5714,4555.71,6475.71,15021.4,4637.14,2168.57,2.71429,47.5714,64.1857,46.8571,104.143,11.0286,5.71429,22.1429,58.4286,0.0,1.57143,25.0,43.4286,28.0,,,0.824413,2020-02-05 23:02:42,3-10
7,26861.2,30352.9,81.625,4413.75,6727.5,15510.0,4623.75,2006.25,2.625,46.5,63.85,47.0,106.25,11.1125,5.625,22.625,58.375,0.0,1.625,25.875,45.5,29.5,25.6667,26.9833,0.835501,2020-02-06 23:02:55,3-10
8,27833.3,31276.3,85.1111,4286.67,7060.0,15946.7,4826.67,1823.33,2.44444,45.8889,63.5556,46.5556,107.778,11.1333,5.77778,23.0,59.2222,0.0,1.66667,29.4444,46.5556,31.2222,26.6286,27.5,0.845002,2020-02-07 23:02:44,3-10
9,28692.0,32053.1,88.5,4269.0,7248.0,16287.0,5157.0,1677.0,2.5,45.5,63.32,46.2,109.0,11.15,5.9,23.3,59.9,0.0,1.7,30.4,47.5,32.5,27.4875,27.625,0.853781,2020-02-07 23:02:44,3-10


In [23]:
rolling_std_list[1].reset_index(drop=True).style.background_gradient(cmap='Blues')

Unnamed: 0,sleep_duration,bed_duration,sleep_score,awake_duration,rem_sleep_duration,light_sleep_duration,deep_sleep_duration,sleep_onset_duration,bed_exit_count,toss_turn_count,average_heart_rate,min_heart_rate,max_heart_rate,average_respiration_rate,min_respiration_rate,max_respiration_rate,average_physical_activity,fm_count,awakenings,hrv_score,hrv_lf,hrv_hf,hrv_rmssd_evening,hrv_rmssd_morning,sleep_efficiency,start_time,PID
0,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-01 21:02:22,3-10
1,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-02 21:02:58,3-10
2,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-03 23:02:42,3-10
3,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-04 14:02:07,3-10
4,,,,,,,,,,,,,,,,,,,,,,,,,,2020-02-04 22:02:54,3-10
5,15795.4,16164.2,50.929,1798.67,4045.46,9490.38,2599.51,1471.34,2.48328,41.4198,3.96606,3.54495,19.4491,1.29872,1.0328,4.32435,20.4906,0.0,1.21106,18.6512,32.1647,20.1965,,,0.184487,2020-02-05 14:02:40,3-10
6,14603.5,14889.8,47.0527,1811.58,3692.98,8909.84,2384.19,1413.48,2.56348,37.8411,4.10464,4.14039,17.7616,1.21204,0.95119,4.29839,19.2774,0.0,1.13389,18.1108,29.9659,19.5874,,,0.173319,2020-02-05 23:02:42,3-10
7,13675.7,13912.0,43.9478,1724.59,3492.42,8363.86,2207.66,1386.83,2.38672,35.1649,3.917,3.8545,17.4908,1.14697,0.916125,4.20671,17.8481,0.0,1.06066,16.949,28.3549,18.6241,4.52931,9.30213,0.163498,2020-02-06 23:02:55,3-10
8,13120.6,13305.1,42.4189,1657.65,3415.75,7932.59,2152.93,1408.55,2.29734,32.9448,3.76899,3.84419,16.991,1.07471,0.971825,4.09268,16.8877,0.0,1.0,19.1319,26.7119,18.1713,4.85514,8.60097,0.155572,2020-02-07 23:02:44,3-10
9,12664.8,12782.4,41.4038,1563.84,3274.82,7555.96,2282.83,1406.3,2.17307,31.085,3.63067,3.79473,16.4789,1.01462,0.994429,3.97352,16.0655,0.0,0.948683,18.289,25.3607,17.6021,5.1095,7.97079,0.149279,2020-02-07 23:02:44,3-10
