In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from astropy.stats.circstats import circmean
from functools import reduce
import datetime
import time
import plotly.express as px
import numpy as np

pd.set_option("display.precision", 2)
plt.rcParams.update({'font.size': 20, 'figure.figsize': (8, 4)})

%matplotlib inline
import matplotlib_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

import seaborn as sns
sns.set()

import warnings
warnings.filterwarnings('ignore')

In [None]:
working_df = pd.read_csv("measurements_full.csv")
working_df['numeric_id'] = working_df['id'].apply(lambda id: int(id))
working_df.info()

### adding sleeping_hour column to dataframe

In [None]:
def convert_epoch_to_time(epoch):
    if not pd.isna(epoch):
        return time.strftime('%H:%M', time.localtime(epoch))

    
working_df['sleeping_hour'] = working_df['start_seconds'].apply(convert_epoch_to_time)
working_df

### creating main table that includes [id, date, heartRate, stress, steps, sleeping_time, sleeping_duration, waking_time, midSleep, week-end, exams]

In [None]:
def is_weekend(date):
    """
    input: a string that descripe a date in format {day}/{month}/{year}
    output: - "Yes" if the date is weekend (friday/saturday)
            - "No" otherwise
    """
    date_arr = date.split('/')
    day, month, year = int(date_arr[0]), int(date_arr[1]), int(date_arr[2])
    date_time = datetime.datetime(year, month, day)
    
    if (date_time.strftime('%A') == 'Friday' or date_time.strftime('%A') == 'Saturday'):
        return 'Yes'
    else:
        return 'No'


def is_exams_period(date):
    """
    input: a string that descripe a date in format {day}/{month}/{year}
    output: - "Yes" if the date falls in exams perioud (25/1/2023 - 15/2/2023)
            - "No" otherwise
    """
    date_arr = date.split('/')
    day, month, year = int(date_arr[0]), int(date_arr[1]), int(date_arr[2])
    
    if (month == 1 and day >= 25) or (month == 2 and day <= 15):
        return 'Yes'
    else:
        return 'No'

    
def convert_epoch_to_time(epoch_time):
    """
    input : epoch time
    output : a string that describe the time in format {hours}:{minutes}
    """
    if not pd.isna(epoch_time):
        return datetime.datetime.fromtimestamp(epoch_time).strftime('%H:%M')


working_df['weekend'] = working_df['date'].apply(is_weekend)
working_df['exam period'] = working_df['date'].apply(is_exams_period)
working_df['midsleep_seconds'] = working_df['start_seconds'] + (working_df['duration_seconds'] / 2)
working_df['midSleep time'] = working_df['midsleep_seconds'].apply(convert_epoch_to_time)
working_df = working_df.fillna(value=np.nan) # replace None values with NaN values
#deleting unwanted columns (that researches doesnt need) from dataframe
measurements = working_df.drop(['start_seconds', 'duration_seconds', 'waking_seconds', 'numeric_id', 'midsleep_seconds', 'sleeping_hour'], axis=1)
#re-ordering columns
measurements = measurements[['id','date', 'steps', 'average heartrate', 'average stress', 'sleep time', 'duration', 'awake time', 'midSleep time', 'weekend', 'exam period']]

### save measurements dataframe to csv

In [None]:
measurements.to_csv('measurements.csv', index=False)
measurements

### coding a function that recieve dataframe and returns averages dataframe

In [None]:
def create_avg_time_df(time_df, time_column_name, average_column_name):
    """
    the function recieves dataframe with time columns, it returns average time dataframe grouped by id
    """
    time_df["radians"] = pd.to_datetime(time_df[time_column_name], format="%H:%M").dt.hour / 24 * 2 * np.pi
    
    # Compute the circular mean of the radians for each group
    average_df = time_df.groupby("id").agg({"radians": circmean, time_column_name: "first"})
    #deleting radians column to not effect recieved dataframe
    time_df.drop(['radians'], axis=1, inplace = True)
    # Convert the radians to "hour:minute" format
    average_df[average_column_name] = pd.to_datetime(average_df["radians"] / (2 * np.pi) * 24, unit="h").dt.strftime("%H:%M")
    
    return average_df.drop(['radians',time_column_name], axis=1)


def create_avg_table_from_df(measurments_df):
    """
    input: a final research results dataframe 
    output: averages dataframe that include the following average columns => 
     => [steps, average heartrate, average stress, average sleep start, duration, average awake time, average midsleep ]         
    """
    ######################calculating average for steps predictors##########################
    
    #creating df without 0 heartrate value rows
    clean_heart_rate_df = measurments_df[measurments_df['average heartrate'] != 0]
    #creating df without -1 stress level rows
    clean_stress_level_df = measurments_df[measurments_df['average stress'] != -1]

    #average dailies predictors dataframe creation
    average_steps_df = measurments_df.groupby('id')['steps'].mean().to_frame()
    #average heartrate dataframe creation
    average_heartRate_df = clean_heart_rate_df.groupby('id')['average heartrate'].mean().to_frame()
    #average stress level dataframe creation
    average_stress_df = clean_stress_level_df.groupby('id')['average stress'].mean().to_frame()


    #merging all dailies average dataframes into one dataframe
    dailies_df_list = [average_steps_df, average_heartRate_df, average_stress_df]
    merged_dailies_df = reduce(lambda left, right: pd.merge(left, right, on="id", how="inner"), dailies_df_list)
    ######################calculating average for sleeps predictors############################
    
    #removing empty sleeps rows
    clean_sleeps_df = measurments_df.dropna()
    
    #average sleeps predictors dataframe creation
    average_sleep_df = create_avg_time_df(clean_sleeps_df, 'sleeping_hour','average sleep start')
    average_sleep_duration_df = clean_sleeps_df.groupby('id')['duration'].mean().to_frame()
    average_awake_df = create_avg_time_df(clean_sleeps_df, 'awake time','average awake time')
    average_midsleep_df = create_avg_time_df(clean_sleeps_df, 'midSleep time','average midsleep')
    
    # merging all sleeps average dataframes into one dataframe
    sleeping_df_list = [average_sleep_df, average_sleep_duration_df, average_midsleep_df, average_awake_df]
    merged_sleeping_df = reduce(lambda left, right: pd.merge(left, right, on="id", how="inner"), sleeping_df_list)

    #creating final merged df
    merged_df = pd.merge(merged_dailies_df, merged_sleeping_df, on='id', how='left')
    
    return merged_df


### creating morning-type, exams-period, mid-week averages table

In [None]:
morning_exams_midweek_students_df = working_df[(working_df['numeric_id'] >= 200) & (working_df['weekend']=='No') & (working_df['exam period']=='Yes')]
morning_exams_midweek_average_df = create_avg_table_from_df(morning_exams_midweek_students_df)

### saving morning-type, mid-week, exams-duration averages dataframe to csv

In [None]:
morning_exams_midweek_average_df.to_csv('morning-midweek-exams-averages.csv')
morning_exams_midweek_average_df

### creating morning-type, exams-period, weekend averages table

In [None]:
morning_exams_weekend_students_df = working_df[(working_df['numeric_id'] >= 200) & (working_df['weekend']=='Yes') & (working_df['exam period']=='Yes')]
morning_exams_weekend_average_df = create_avg_table_from_df(morning_exams_weekend_students_df)

### saving morning-type, exams-period, weekend averages dataframe to csv

In [None]:
morning_exams_weekend_average_df.to_csv('morning-weekend-exams-averages.csv')
morning_exams_weekend_average_df

### creating morning-type, non-exams, weekend average table

In [None]:
morning_non_exams_weekend_students_df = working_df[(working_df['numeric_id'] >= 200) & (working_df['weekend']=='Yes') & (working_df['exam period']=='No')]
morning_non_exams_weekend_averages_df = create_avg_table_from_df(morning_non_exams_weekend_students_df)

### saving morning-type, non-exams, weekend averages dataframe to csv

In [None]:
morning_non_exams_weekend_averages_df.to_csv('morning-weekend-non-exams-averages.csv')
morning_non_exams_weekend_averages_df

### creating morning-type, non-exams, midweek average table

In [None]:
morning_non_exams_midweek_students_df = working_df[(working_df['numeric_id'] >= 200) & (working_df['weekend']=='No') & (working_df['exam period']=='No')]
morning_non_exams_midweek_average_df = create_avg_table_from_df(morning_non_exams_midweek_students_df)

### saving morning-type, non-exams, weekend averages dataframe to csv

In [None]:
morning_non_exams_midweek_average_df.to_csv('morning-midweek-non-exams-averages.csv')
morning_non_exams_midweek_average_df

### creating night-type, non-exams, midweek average table

In [None]:
night_non_exams_midweek_students_df = working_df[(working_df['numeric_id'] < 200) & (working_df['weekend']=='No') & (working_df['exam period']=='No')]
night_non_exams_midweek_average_df = create_avg_table_from_df(night_non_exams_midweek_students_df)

### saving night-type, non-exams, weekend averages dataframe to csv

In [None]:
night_non_exams_midweek_average_df.to_csv('night-midweek-non-exams-averages.csv')
night_non_exams_midweek_average_df

### creating night-type, exams, midweek average table

In [None]:
night_exams_midweek_students_df = working_df[(working_df['numeric_id'] < 200) & (working_df['weekend']=='No') & (working_df['exam period']=='Yes')]
night_exams_midweek_average_df = create_avg_table_from_df(night_exams_midweek_students_df)

### saving night-type, exams, midweek averages dataframe to csv

In [None]:
night_exams_midweek_average_df.to_csv('night-midweek-exams-averages.csv')
night_exams_midweek_average_df

### creating night-type, non-exams, weekend average table

In [None]:
night_non_exams_weekend_students_df = working_df[(working_df['numeric_id'] < 200) & (working_df['weekend']=='Yes') & (working_df['exam period']=='No')]
night_non_exams_weekend_average_df = create_avg_table_from_df(night_non_exams_weekend_students_df)

### saving night-type, non-exams, weekend averages dataframe to csv

In [None]:
night_non_exams_weekend_average_df.to_csv('night-weekend-non-exams-averages.csv')
night_non_exams_weekend_average_df

### creating night-type, exams, weekend average table

In [None]:
night_exams_weekend_students_df = working_df[(working_df['numeric_id'] < 200) & (working_df['weekend']=='Yes') & (working_df['exam period']=='Yes')]
night_exams_weekend_average_df = create_avg_table_from_df(night_exams_weekend_students_df)

### saving night-type, exams, weekend averages dataframe to csv

In [None]:
night_exams_weekend_average_df.to_csv('night-weekend-exams-averages.csv')
night_exams_weekend_average_df

### creating sleep duration difference (row are: non-exams - exams, midweek - weekend)

In [None]:
#seperating working df into weekend measurements and midweek measurements
weekend_df = working_df[working_df['weekend'] == 'Yes'].dropna()
midweek_df = working_df[working_df['weekend'] == 'No'].dropna()
# calculating sum of sleeping hours for each user 
sum_sleep_hours_weekend_df = weekend_df.groupby('id')['duration'].sum().reset_index()
sum_sleep_hours_weekend_df.rename(columns={'duration':'weekend sleep duration'}, inplace=True)
sum_sleep_hours_midweek_df = midweek_df.groupby('id')['duration'].sum().reset_index()
sum_sleep_hours_midweek_df.rename(columns={'duration':'midweek sleep duration'}, inplace=True)
#merging both midweek and weekend tables
sum_of_sleeps_weekend_midweek_df = pd.merge(sum_sleep_hours_weekend_df,sum_sleep_hours_midweek_df, on = "id", how='outer')

#seperating working df into weekend measurements and midweek measurements
exams_period_df = working_df[working_df['exam period'] == 'Yes'].dropna()
non_exams_period_df = working_df[working_df['exam period'] == 'No'].dropna()
# calculating sum of sleeping hours for each user
sum_sleep_hours_exams_df = exams_period_df.groupby('id')['duration'].sum().reset_index()
sum_sleep_hours_exams_df.rename(columns={'duration':'exams sleep duration'}, inplace=True)
sum_sleep_hours_non_exams_df = non_exams_period_df.groupby('id')['duration'].sum().reset_index()
sum_sleep_hours_non_exams_df.rename(columns={'duration':'non-exams sleep duration'}, inplace=True)
#merging both exams and non-exams tables
sum_of_sleeps_exams_non_exams_df = pd.merge(sum_sleep_hours_exams_df,sum_sleep_hours_non_exams_df, on = "id", how='outer')

#merging all sleeping duration tables
sleeps_manipulations_df = pd.merge(sum_of_sleeps_weekend_midweek_df, sum_of_sleeps_exams_non_exams_df, on='id', how='outer')
sleeps_manipulations_df.fillna(0, inplace=True)

#calculating sleep diffrence between (midweek - weekend) and (exams - non exams)
sleeps_manipulations_df['midweek - weekend'] = sleeps_manipulations_df['midweek sleep duration'] - sleeps_manipulations_df['weekend sleep duration']
sleeps_manipulations_df['non exams - exams'] = sleeps_manipulations_df['non-exams sleep duration'] - sleeps_manipulations_df['exams sleep duration']

#counting sleeping measures for each student
count_sleep_hours_weekend_df = weekend_df.groupby('id')['duration'].count().reset_index()
count_sleep_hours_weekend_df.rename(columns={'duration':'weekend measurements count'}, inplace=True)
count_sleep_hours_midweek_df = midweek_df.groupby('id')['duration'].count().reset_index()
count_sleep_hours_midweek_df.rename(columns={'duration':'midweek measurements count'}, inplace=True)
count_sleep_hours_exams_df = exams_period_df.groupby('id')['duration'].count().reset_index()
count_sleep_hours_exams_df.rename(columns={'duration':'exams measurements count'}, inplace=True)
count_sleep_hours_non_exams_df = non_exams_period_df.groupby('id')['duration'].count().reset_index()
count_sleep_hours_non_exams_df.rename(columns={'duration':'non-exams measurements count'}, inplace=True)

#merging all counters df
sleeps_counters_df = [count_sleep_hours_weekend_df, count_sleep_hours_midweek_df, count_sleep_hours_exams_df, count_sleep_hours_non_exams_df]
merged_sleeps_counter_df = reduce(lambda left, right: pd.merge(left, right, on="id", how="outer"), sleeps_counters_df)
merged_sleeps_counter_df.fillna(0, inplace=True)

### saving sleeps measurements sum and diffrence dataframe to csv file

In [None]:
sleeps_manipulations_df.to_csv('sleeping-duration-sum-difference.csv')
sleeps_manipulations_df

### saving sleeping measurements count dataframe to csv file

In [None]:
merged_sleeps_counter_df.to_csv('sleeping_measurements_count.csv')
merged_sleeps_counter_df

#### Merging all averages tables

In [None]:
#adding description columns to all averages dataframes
morning_exams_midweek_average_df["type"] = ['Morning' for i in range(len(morning_exams_midweek_average_df))]
morning_exams_midweek_average_df["exams period"] = ['Yes' for i in range(len(morning_exams_midweek_average_df))]
morning_exams_midweek_average_df["weekend"] = ['No' for i in range(len(morning_exams_midweek_average_df))]

morning_exams_weekend_average_df["type"] = ['Morning' for i in range(len(morning_exams_weekend_average_df))]
morning_exams_weekend_average_df["exams period"] = ['Yes' for i in range(len(morning_exams_weekend_average_df))]
morning_exams_weekend_average_df["weekend"] = ['Yes' for i in range(len(morning_exams_weekend_average_df))]

morning_non_exams_weekend_averages_df["type"] = ['Morning' for i in range(len(morning_non_exams_weekend_averages_df))]
morning_non_exams_weekend_averages_df["exams period"] = ['No' for i in range(len(morning_non_exams_weekend_averages_df))]
morning_non_exams_weekend_averages_df["weekend"] = ['Yes' for i in range(len(morning_non_exams_weekend_averages_df))]

morning_non_exams_midweek_average_df["type"] = ['Morning' for i in range(len(morning_non_exams_midweek_average_df))]
morning_non_exams_midweek_average_df["exams period"] = ['No' for i in range(len(morning_non_exams_midweek_average_df))]
morning_non_exams_midweek_average_df["weekend"] = ['No' for i in range(len(morning_non_exams_midweek_average_df))]

night_non_exams_midweek_average_df["type"] = ['Night' for i in range(len(night_non_exams_midweek_average_df))]
night_non_exams_midweek_average_df["exams period"] = ['No' for i in range(len(night_non_exams_midweek_average_df))]
night_non_exams_midweek_average_df["weekend"] = ['No' for i in range(len(night_non_exams_midweek_average_df))]

night_exams_midweek_average_df["type"] = ['Night' for i in range(len(night_exams_midweek_average_df))]
night_exams_midweek_average_df["exams period"] = ['Yes' for i in range(len(night_exams_midweek_average_df))]
night_exams_midweek_average_df["weekend"] = ['No' for i in range(len(night_exams_midweek_average_df))]

night_non_exams_weekend_average_df["type"] = ['Night' for i in range(len(night_non_exams_weekend_average_df))]
night_non_exams_weekend_average_df["exams period"] = ['No' for i in range(len(night_non_exams_weekend_average_df))]
night_non_exams_weekend_average_df["weekend"] = ['Yes' for i in range(len(night_non_exams_weekend_average_df))]

night_exams_weekend_average_df["type"] = ['Night' for i in range(len(night_exams_weekend_average_df))]
night_exams_weekend_average_df["exams period"] = ['Yes' for i in range(len(night_exams_weekend_average_df))]
night_exams_weekend_average_df["weekend"] = ['Yes' for i in range(len(night_exams_weekend_average_df))]

merged_averages_df = pd.concat([morning_exams_midweek_average_df, morning_exams_weekend_average_df, morning_non_exams_weekend_averages_df,
                                morning_non_exams_midweek_average_df, night_non_exams_midweek_average_df, night_exams_midweek_average_df,
                                night_non_exams_weekend_average_df, night_exams_weekend_average_df]).reset_index()
sorted_by_id_merged_averages_df = merged_averages_df.sort_values('id').set_index('id').reset_index()

### saving merged averages dataframe to csv file

In [None]:
merged_averages_df.to_csv('merged_averages.csv')
sorted_by_id_merged_averages_df.to_csv('sorted_merged_averages.csv')
sorted_by_id_merged_averages_df

### creating average sleep duration table for each tested student

In [None]:
averages_df = create_avg_table_from_df(working_df).reset_index()
sleep_duration_averages_df = averages_df[['id','duration']].set_index('id')

In [None]:
sleep_duration_averages_df.to_csv('sleep_duration_averages.csv')
sleep_duration_averages_df

### creating average midsleep table for midweek measurements

In [45]:
mid_week_df = working_df[ (working_df['weekend']=='No')]
mid_week_averages_df = create_avg_table_from_df(mid_week_df).reset_index()
mid_week_midsleep_averages_df = mid_week_averages_df[['id','average midsleep']].set_index('id')

In [None]:
mid_week_midsleep_averages_df.to_csv('midweek_midsleep_average.csv')
mid_week_midsleep_averages_df

### creating average midsleep table for weekend measurements

In [None]:
weekend_df = working_df[ (working_df['weekend']=='Yes')]
weekend_averages_df = create_avg_table_from_df(weekend_df).reset_index()
weekend_midsleep_averages_df = weekend_averages_df[['id','average midsleep']].set_index('id')

In [None]:
weekend_midsleep_averages_df.to_csv('weekend_midsleep_average.csv')
weekend_midsleep_averages_df