In [13]:
import pandas as pd
import matplotlib.pyplot as plt
from astropy.stats.circstats import circmean
from functools import reduce
import datetime
import time
import plotly.express as px
import numpy as np

pd.set_option("display.precision", 2)
plt.rcParams.update({'font.size': 20, 'figure.figsize': (8, 4)})

%matplotlib inline
import matplotlib_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

import seaborn as sns
sns.set()

import warnings
warnings.filterwarnings('ignore')

In [14]:
working_df = pd.read_csv("measurements_full.csv")
working_df['numeric_id'] = working_df['id'].apply(lambda id: int(id))
working_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3027 entries, 0 to 3026
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 3027 non-null   int64  
 1   date               3027 non-null   object 
 2   steps              3027 non-null   int64  
 3   average heartrate  3027 non-null   int64  
 4   average stress     3027 non-null   int64  
 5   sleep time         2423 non-null   object 
 6   duration           2423 non-null   float64
 7   awake time         2423 non-null   object 
 8   start_seconds      2423 non-null   float64
 9   duration_seconds   2423 non-null   float64
 10  waking_seconds     2423 non-null   float64
 11  numeric_id         3027 non-null   int64  
dtypes: float64(4), int64(5), object(3)
memory usage: 283.9+ KB


### adding sleeping_hour column to dataframe

In [15]:
def convert_epoch_to_time(epoch):
    if not pd.isna(epoch):
        return time.strftime('%H:%M', time.localtime(epoch))

    
working_df['sleeping_hour'] = working_df['start_seconds'].apply(convert_epoch_to_time)
working_df

Unnamed: 0,id,date,steps,average heartrate,average stress,sleep time,duration,awake time,start_seconds,duration_seconds,waking_seconds,numeric_id,sleeping_hour
0,101,21/01/2023,4334,121,-1,,,,,,,101,
1,101,22/01/2023,1753,104,26,,,,,,,101,
2,101,23/01/2023,2136,89,19,,,,,,,101,
3,101,24/01/2023,1360,76,27,,,,,,,101,
4,101,25/01/2023,5275,74,27,24/01/2023 23:02,7.13,6:10,1.67e+09,25680.0,1.67e+09,101,23:02
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3022,229,16/03/2023,18362,87,46,15/03/2023 19:30,5.58,1:05,1.68e+09,20100.0,1.68e+09,229,19:30
3023,229,17/03/2023,1854,77,27,16/03/2023 23:04,4.42,3:29,1.68e+09,15900.0,1.68e+09,229,23:04
3024,229,18/03/2023,4079,77,32,18/03/2023 2:13,7.08,9:18,1.68e+09,25500.0,1.68e+09,229,02:13
3025,229,19/03/2023,193,65,7,19/03/2023 0:30,7.00,7:30,1.68e+09,25200.0,1.68e+09,229,00:30


### creating main table that includes [id, date, heartRate, stress, steps, sleeping_time, sleeping_duration, waking_time, midSleep, week-end, exams]

In [16]:
def is_weekend(date):
    """
    input: a string that descripe a date in format {day}/{month}/{year}
    output: - "Yes" if the date is weekend (friday/saturday)
            - "No" otherwise
    """
    date_arr = date.split('/')
    day, month, year = int(date_arr[0]), int(date_arr[1]), int(date_arr[2])
    date_time = datetime.datetime(year, month, day)
    
    if (date_time.strftime('%A') == 'Friday' or date_time.strftime('%A') == 'Saturday'):
        return 'Yes'
    else:
        return 'No'


def is_exams_period(date):
    """
    input: a string that descripe a date in format {day}/{month}/{year}
    output: - "Yes" if the date falls in exams perioud (25/1/2023 - 15/2/2023)
            - "No" otherwise
    """
    date_arr = date.split('/')
    day, month, year = int(date_arr[0]), int(date_arr[1]), int(date_arr[2])
    
    if (month == 1 and day >= 25) or (month == 2 and day <= 15):
        return 'Yes'
    else:
        return 'No'

    
def convert_epoch_to_time(epoch_time):
    """
    input : epoch time
    output : a string that describe the time in format {hours}:{minutes}
    """
    if not pd.isna(epoch_time):
        return datetime.datetime.fromtimestamp(epoch_time).strftime('%H:%M')


working_df['weekend'] = working_df['date'].apply(is_weekend)
working_df['exam period'] = working_df['date'].apply(is_exams_period)
working_df['midsleep_seconds'] = working_df['start_seconds'] + (working_df['duration_seconds'] / 2)
working_df['midSleep time'] = working_df['midsleep_seconds'].apply(convert_epoch_to_time)
working_df = working_df.fillna(value=np.nan) # replace None values with NaN values
#deleting unwanted columns (that researches doesnt need) from dataframe
measurements = working_df.drop(['start_seconds', 'duration_seconds', 'waking_seconds', 'numeric_id', 'midsleep_seconds', 'sleeping_hour'], axis=1)
#re-ordering columns
measurements = measurements[['id','date', 'steps', 'average heartrate', 'average stress', 'sleep time', 'duration', 'awake time', 'midSleep time', 'weekend', 'exam period']]

### save measurements dataframe to csv

In [17]:
measurements.to_csv('measurements.csv', index=False)
measurements

Unnamed: 0,id,date,steps,average heartrate,average stress,sleep time,duration,awake time,midSleep time,weekend,exam period
0,101,21/01/2023,4334,121,-1,,,,,Yes,No
1,101,22/01/2023,1753,104,26,,,,,No,No
2,101,23/01/2023,2136,89,19,,,,,No,No
3,101,24/01/2023,1360,76,27,,,,,No,No
4,101,25/01/2023,5275,74,27,24/01/2023 23:02,7.13,6:10,02:36,No,Yes
...,...,...,...,...,...,...,...,...,...,...,...
3022,229,16/03/2023,18362,87,46,15/03/2023 19:30,5.58,1:05,22:17,No,No
3023,229,17/03/2023,1854,77,27,16/03/2023 23:04,4.42,3:29,01:16,Yes,No
3024,229,18/03/2023,4079,77,32,18/03/2023 2:13,7.08,9:18,05:45,Yes,No
3025,229,19/03/2023,193,65,7,19/03/2023 0:30,7.00,7:30,04:00,No,No


### coding a function that recieve dataframe and returns averages dataframe

In [18]:
def create_avg_time_df(time_df, time_column_name, average_column_name):
    """
    the function recieves dataframe with time columns, it returns average time dataframe grouped by id
    """
    time_df["radians"] = pd.to_datetime(time_df[time_column_name], format="%H:%M").dt.hour / 24 * 2 * np.pi
    
    # Compute the circular mean of the radians for each group
    average_df = time_df.groupby("id").agg({"radians": circmean, time_column_name: "first"})
    #deleting radians column to not effect recieved dataframe
    time_df.drop(['radians'], axis=1, inplace = True)
    # Convert the radians to "hour:minute" format
    average_df[average_column_name] = pd.to_datetime(average_df["radians"] / (2 * np.pi) * 24, unit="h").dt.strftime("%H:%M")
    
    return average_df.drop(['radians',time_column_name], axis=1)


def create_avg_table_from_df(measurments_df):
    """
    input: a final research results dataframe 
    output: averages dataframe that include the following average columns => 
     => [steps, average heartrate, average stress, average sleep start, duration, average awake time, average midsleep ]         
    """
    ######################calculating average for steps predictors##########################
    
    #creating df without 0 heartrate value rows
    clean_heart_rate_df = measurments_df[measurments_df['average heartrate'] != 0]
    #creating df without -1 stress level rows
    clean_stress_level_df = measurments_df[measurments_df['average stress'] != -1]

    #average dailies predictors dataframe creation
    average_steps_df = measurments_df.groupby('id')['steps'].mean().to_frame()
    #average heartrate dataframe creation
    average_heartRate_df = clean_heart_rate_df.groupby('id')['average heartrate'].mean().to_frame()
    #average stress level dataframe creation
    average_stress_df = clean_stress_level_df.groupby('id')['average stress'].mean().to_frame()


    #merging all dailies average dataframes into one dataframe
    dailies_df_list = [average_steps_df, average_heartRate_df, average_stress_df]
    merged_dailies_df = reduce(lambda left, right: pd.merge(left, right, on="id", how="inner"), dailies_df_list)
    ######################calculating average for sleeps predictors############################
    
    #removing empty sleeps rows
    clean_sleeps_df = measurments_df.dropna()
    
    #average sleeps predictors dataframe creation
    average_sleep_df = create_avg_time_df(clean_sleeps_df, 'sleeping_hour','average sleep start')
    average_sleep_duration_df = clean_sleeps_df.groupby('id')['duration'].mean().to_frame()
    average_awake_df = create_avg_time_df(clean_sleeps_df, 'awake time','average awake time')
    average_midsleep_df = create_avg_time_df(clean_sleeps_df, 'midSleep time','average midsleep')
    
    # merging all sleeps average dataframes into one dataframe
    sleeping_df_list = [average_sleep_df, average_sleep_duration_df, average_midsleep_df, average_awake_df]
    merged_sleeping_df = reduce(lambda left, right: pd.merge(left, right, on="id", how="inner"), sleeping_df_list)

    #creating final merged df
    merged_df = pd.merge(merged_dailies_df, merged_sleeping_df, on='id', how='left')
    
    return merged_df


### creating morning-type, exams-period, mid-week averages table

In [19]:
morning_exams_midweek_students_df = working_df[(working_df['numeric_id'] < 200) & (working_df['weekend']=='No') & (working_df['exam period']=='Yes')]
morning_exams_midweek_average_df = create_avg_table_from_df(morning_exams_midweek_students_df)

### saving morning-type, mid-week, exams-duration averages dataframe to csv

In [20]:
morning_exams_midweek_average_df.to_csv('morning-midweek-exams-averages.csv')
morning_exams_midweek_average_df

Unnamed: 0_level_0,steps,average heartrate,average stress,average sleep start,duration,average midsleep,average awake time
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
101,4958.88,72.31,29.0,22:37,8.98,03:03,07:29
102,4499.13,92.07,42.07,,,,
105,6978.81,77.0,37.31,00:33,8.86,05:03,09:29
107,5808.81,80.88,37.44,23:33,9.9,04:39,09:41
108,5286.43,81.5,25.5,00:56,8.26,05:01,09:16
109,4258.13,87.13,40.07,00:01,6.66,03:16,06:39
110,3345.0,80.5,30.5,02:00,6.83,05:30,08:28
111,6582.12,70.94,32.19,23:42,9.61,04:29,09:04
112,3962.75,93.25,49.5,00:51,7.71,04:42,08:16
113,4879.0,77.5,41.5,01:30,7.38,05:12,08:55


### creating morning-type, exams-period, weekend averages table

In [21]:
morning_exams_weekend_students_df = working_df[(working_df['numeric_id'] < 200) & (working_df['weekend']=='Yes') & (working_df['exam period']=='Yes')]
morning_exams_weekend_average_df = create_avg_table_from_df(morning_exams_weekend_students_df)

### saving morning-type, exams-period, weekend averages dataframe to csv

In [22]:
morning_exams_weekend_average_df.to_csv('morning-weekend-exams-averages.csv')
morning_exams_weekend_average_df

Unnamed: 0_level_0,steps,average heartrate,average stress,average sleep start,duration,average midsleep,average awake time
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
101,3392.33,66.33,18.5,21:12,10.99,02:32,08:00
102,8260.8,91.4,34.8,22:00,13.03,05:00,11:00
105,5422.17,76.67,34.83,00:28,9.54,05:10,09:50
107,5502.5,78.0,31.75,02:00,8.41,06:30,10:30
108,4184.0,82.0,24.0,02:00,8.4,06:00,11:00
109,5852.0,89.4,39.6,00:27,6.89,04:14,07:14
110,5292.0,86.75,44.5,02:00,10.61,07:30,13:00
111,8595.83,72.17,32.17,00:20,9.4,05:10,09:31
112,8999.0,95.75,35.5,02:32,8.16,06:30,10:30
113,4158.0,74.67,34.83,01:19,7.85,05:19,09:10


### creating morning-type, non-exams, weekend average table

In [23]:
morning_non_exams_weekend_students_df = working_df[(working_df['numeric_id'] < 200) & (working_df['weekend']=='Yes') & (working_df['exam period']=='No')]
morning_non_exams_weekend_averages_df = create_avg_table_from_df(morning_non_exams_weekend_students_df)

### saving morning-type, non-exams, weekend averages dataframe to csv

In [24]:
morning_non_exams_weekend_averages_df.to_csv('morning-weekend-non-exams-averages.csv')
morning_non_exams_weekend_averages_df

Unnamed: 0_level_0,steps,average heartrate,average stress,average sleep start,duration,average midsleep,average awake time
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
101,3860.24,74.44,22.07,23:02,9.09,03:41,07:55
102,11666.9,95.0,36.67,,,,
105,9896.92,77.75,40.58,00:19,8.67,04:50,09:00
107,6754.19,82.06,34.62,00:28,11.18,06:03,11:40
108,8821.89,85.56,27.44,01:30,8.49,05:45,09:30
109,11430.67,89.5,36.0,01:30,7.71,05:00,09:00
110,8401.57,86.0,40.71,01:37,7.69,05:12,08:57
111,11871.73,75.18,36.73,00:16,8.42,04:22,08:32
112,8044.43,96.0,45.57,03:00,7.88,07:02,10:40
113,4707.64,75.38,38.46,01:59,8.78,06:23,10:58


### creating morning-type, non-exams, midweek average table

In [26]:
morning_non_exams_midweek_students_df = working_df[(working_df['numeric_id'] < 200) & (working_df['weekend']=='No') & (working_df['exam period']=='No')]
morning_non_exams_midweek_average_df = create_avg_table_from_df(morning_non_exams_midweek_students_df)

### saving morning-type, non-exams, weekend averages dataframe to csv

In [27]:
morning_non_exams_midweek_average_df.to_csv('morning-midweek-non-exams-averages.csv')
morning_non_exams_midweek_average_df

Unnamed: 0_level_0,steps,average heartrate,average stress,average sleep start,duration,average midsleep,average awake time
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
101,6443.0,74.75,28.65,23:05,8.38,03:16,07:29
102,7405.0,96.3,44.22,,,,
105,8467.72,78.21,41.38,00:36,8.84,05:05,09:27
107,6273.81,79.03,33.62,00:29,10.29,05:31,10:35
108,5817.29,81.5,24.65,23:49,7.95,03:51,07:43
109,6485.28,86.38,36.06,00:34,7.74,04:40,08:35
110,3491.12,77.93,28.2,03:24,9.83,08:17,13:21
111,7499.73,72.27,35.47,23:50,9.21,04:24,08:58
112,9446.44,94.19,40.06,00:59,8.11,05:17,09:11
113,5162.97,74.49,39.08,00:56,7.93,04:53,08:46


### creating night-type, non-exams, midweek average table

In [29]:
night_non_exams_midweek_students_df = working_df[(working_df['numeric_id'] >= 200) & (working_df['weekend']=='No') & (working_df['exam period']=='No')]
night_non_exams_midweek_average_df = create_avg_table_from_df(night_non_exams_midweek_students_df)

### saving night-type, non-exams, weekend averages dataframe to csv

In [30]:
night_non_exams_midweek_average_df.to_csv('night-midweek-non-exams-averages.csv')
night_non_exams_midweek_average_df

Unnamed: 0_level_0,steps,average heartrate,average stress,average sleep start,duration,average midsleep,average awake time
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
201,14101.76,70.31,27.07,22:31,9.59,03:14,08:13
202,4212.2,75.1,22.03,23:06,7.9,03:01,07:10
203,8332.41,88.07,33.52,22:28,7.63,02:16,06:14
204,6125.9,75.35,38.0,,,,
205,8278.96,80.56,30.7,23:01,8.3,03:10,07:23
206,10717.39,82.29,27.32,23:49,7.01,03:25,06:57
207,9813.81,80.67,23.48,00:07,6.86,03:26,06:52
208,5070.74,86.87,29.35,23:43,8.16,03:45,07:58
209,8666.11,79.89,28.85,23:36,7.83,03:25,07:18
210,6568.36,76.64,22.5,23:40,7.07,03:15,06:52


### creating night-type, exams, midweek average table

In [32]:
night_exams_midweek_students_df = working_df[(working_df['numeric_id'] >= 200) & (working_df['weekend']=='No') & (working_df['exam period']=='Yes')]
night_exams_midweek_average_df = create_avg_table_from_df(night_exams_midweek_students_df)

### saving night-type, exams, midweek averages dataframe to csv

In [33]:
night_exams_midweek_average_df.to_csv('night-midweek-exams-averages.csv')
night_exams_midweek_average_df

Unnamed: 0_level_0,steps,average heartrate,average stress,average sleep start,duration,average midsleep,average awake time
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
201,16022.13,73.13,28.6,22:24,9.26,02:54,07:35
202,2875.19,74.06,26.25,22:52,8.17,02:59,07:22
203,2684.14,87.14,37.21,21:53,7.33,01:19,05:00
204,3079.43,71.5,30.07,23:24,8.46,03:36,07:48
205,4311.53,78.93,33.8,22:43,8.71,03:11,07:22
206,7304.56,83.25,31.94,23:36,7.45,03:16,06:52
207,6124.21,81.0,25.5,23:21,7.16,03:05,06:49
208,3300.06,85.88,31.38,23:37,8.81,03:56,08:24
209,3561.75,81.94,39.19,22:17,8.31,02:22,06:35
210,2986.75,78.12,27.38,00:03,6.49,03:26,06:55


### creating night-type, non-exams, weekend average table

In [36]:
night_non_exams_weekend_students_df = working_df[(working_df['numeric_id'] >= 200) & (working_df['weekend']=='Yes') & (working_df['exam period']=='No')]
night_non_exams_weekend_average_df = create_avg_table_from_df(night_non_exams_weekend_students_df)

### saving night-type, non-exams, weekend averages dataframe to csv

In [37]:
night_non_exams_weekend_average_df.to_csv('night-weekend-non-exams-averages.csv')
night_non_exams_weekend_average_df

Unnamed: 0_level_0,steps,average heartrate,average stress,average sleep start,duration,average midsleep,average awake time
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
201,10195.55,67.27,22.27,22:33,11.47,04:20,10:14
202,4407.62,75.46,19.77,23:11,9.35,04:05,08:44
203,7583.08,85.08,28.17,22:10,8.26,02:23,06:30
204,6212.8,72.0,32.2,,,,
205,5897.6,78.2,23.6,23:14,8.68,03:42,08:05
206,11980.45,88.0,34.55,00:45,7.39,04:22,08:07
207,8854.82,82.64,27.36,01:25,7.73,05:12,09:11
208,5883.0,88.0,29.73,23:53,9.52,04:33,09:25
209,8044.73,82.6,32.7,22:24,9.15,03:05,07:46
210,5542.73,79.45,26.73,00:33,6.83,03:53,07:26


### creating night-type, exams, weekend average table

In [39]:
night_exams_weekend_students_df = working_df[(working_df['numeric_id'] >= 200) & (working_df['weekend']=='Yes') & (working_df['exam period']=='Yes')]
night_exams_weekend_average_df = create_avg_table_from_df(night_exams_weekend_students_df)

### saving night-type, exams, weekend averages dataframe to csv

In [40]:
night_exams_weekend_average_df.to_csv('night-weekend-exams-averages.csv')
night_exams_weekend_average_df

Unnamed: 0_level_0,steps,average heartrate,average stress,average sleep start,duration,average midsleep,average awake time
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
201,10694.33,70.5,30.0,23:45,9.69,04:15,09:14
202,2124.5,73.0,23.33,23:39,7.73,03:49,07:40
203,3042.5,87.5,33.33,22:00,7.24,01:19,05:00
204,5088.0,71.5,26.83,23:00,9.14,03:29,08:00
205,6557.0,77.0,24.2,22:44,8.84,03:00,07:30
206,7931.0,82.17,28.67,23:45,7.57,03:52,07:40
207,4578.17,79.17,28.17,00:00,8.04,04:12,08:00
208,2091.5,84.0,30.0,23:43,8.38,03:45,08:01
209,6329.67,82.0,33.67,00:11,7.51,03:51,07:40
210,3469.0,76.17,24.17,00:00,7.08,03:35,07:23
