In [None]:
import pandas as pd
import numpy as np
import random
import secrets
from datetime import datetime

file_path = "raw_sleep.csv"
df = pd.read_csv(file_path)


In [None]:
# Calculating epoch time
df['start'] = pd.to_datetime(df['start'])
df['end'] = pd.to_datetime(df['end'])

# Convert datetime objects to epoch time
df['start_epoch'] = df['start'].astype(int) // 10**9
df['end_epoch'] = df['end'].astype(int) // 10**9
df["duration"] = df['end_epoch'] - df['start_epoch']


In [None]:
duration_sum = df.groupby('stage')['duration'].sum()
duration_sum


In [None]:

# Grouping by date
df['start'] = pd.to_datetime(df['start'])
# df['start_date'] = df['start'].dt.date
df['start_date'] = pd.to_datetime(df['start'])

# Extract year and month and replace the "start" column with it
# df['start_date'] = df['start_date'].dt.to_period('M')
df['start_date'] = df['start_date'].dt.to_period('M')

# Pivoting table into relevant format
grouped = df.groupby(['start_date', 'stage'])['duration'].sum().reset_index()
pivot_df = df.pivot_table(index='start_date', columns='stage', values='duration', aggfunc='sum').reset_index()

# Cleaning data
pivot_df = pivot_df.drop(['ASLEEP_UNSPECIFIED', 'IN_BED', 'AWAKE'], axis=1).dropna()

pivot_df["SUM"] = pivot_df["DEEP"] + pivot_df["LIGHT"] + pivot_df["REM"]

pivot_df["DEEP"] = pivot_df["DEEP"] / pivot_df["SUM"]
pivot_df["LIGHT"] = pivot_df["LIGHT"] / pivot_df["SUM"]
pivot_df["REM"] = pivot_df["REM"] / pivot_df["SUM"]

pivot_df
# TODO: draw a bar graph that compares this to average 

In [None]:
import matplotlib.pyplot as plt

# Convert start_date to string
pivot_df['start_date'] = pivot_df['start_date'].astype(str)

# Plotting
plt.figure(figsize=(10, 6))

plt.plot(pivot_df['start_date'], pivot_df['DEEP'], marker='o', label='DEEP')
plt.plot(pivot_df['start_date'], pivot_df['LIGHT'], marker='o', label='LIGHT')
plt.plot(pivot_df['start_date'], pivot_df['REM'], marker='o', label='REM')

plt.title('Sleep Stages Over Time')
plt.xlabel('Date')
plt.ylabel('Duration (minutes)')
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.tight_layout()

plt.show()

In [None]:

# sleep_metrics (sleep efficiency)

#1. JOIN the two sleep datasets by session_id so you can get the date for both
# get the (sleep efficiency / sleep latency / deep : light : rem ratio / sleepWakeups) for each day

#2. GET the dataframes for each graph you'll plot

#3. PLOT the graphs in python

#4. GENERATE new .csv files, upload them to firestore

#5. REACT to read data from DynamoDB and display it on the site

#6. CHATBOT to get health info

#7. DESIGN the site to look nicer














In [None]:



metrics = pd.read_csv("sleep_metrics.csv")
raw = pd.read_csv("raw_sleep.csv")


metrics = metrics.dropna()
metrics




In [5]:
import pandas as pd
import numpy as np
import random
import secrets
from datetime import datetime
import matplotlib.pyplot as plt

# Loading in data
metrics = pd.read_csv("sleep_metrics.csv")
raw = pd.read_csv("raw_sleep.csv")

metrics

Unnamed: 0,sleep_metric,user_id,sleep_session_id,value
0,percentageSleepDeep,94155c17-864d-46ee-be22-775137bd1cee,1ca68c00-9dad-486e-b8c5-844e9ecdd2ca,10.87
1,percentageSleepRem,94155c17-864d-46ee-be22-775137bd1cee,1ca68c00-9dad-486e-b8c5-844e9ecdd2ca,16.94
2,percentageSleepLight,94155c17-864d-46ee-be22-775137bd1cee,1ca68c00-9dad-486e-b8c5-844e9ecdd2ca,70.45
3,percentageNightAwake,94155c17-864d-46ee-be22-775137bd1cee,1ca68c00-9dad-486e-b8c5-844e9ecdd2ca,5.61
4,percentageNightDeep,94155c17-864d-46ee-be22-775137bd1cee,1ca68c00-9dad-486e-b8c5-844e9ecdd2ca,10.44
...,...,...,...,...
11511,percentageSleepLight,94155c17-864d-46ee-be22-775137bd1cee,3ed82279-446c-4c72-a6dc-35df4757b571,60.65
11512,percentageNightAwake,94155c17-864d-46ee-be22-775137bd1cee,3ed82279-446c-4c72-a6dc-35df4757b571,11.81
11513,percentageNightDeep,94155c17-864d-46ee-be22-775137bd1cee,3ed82279-446c-4c72-a6dc-35df4757b571,12.22
11514,percentageNightRem,94155c17-864d-46ee-be22-775137bd1cee,3ed82279-446c-4c72-a6dc-35df4757b571,20.31


In [3]:

import pandas as pd
import numpy as np
import random
import secrets
from datetime import datetime
import matplotlib.pyplot as plt

# Loading in data
metrics = pd.read_csv("sleep_metrics.csv")
raw = pd.read_csv("raw_sleep.csv")

# Merging and cleaning
merged = pd.merge(raw, metrics, on='sleep_session_id', how='inner')
merged = merged.drop(['id', 'timezone', 'log_method', 'user_id', 'end', 'stage'], axis=1)
merged['start'] = pd.to_datetime(merged['start'])
merged['start'] = merged['start'].dt.date
merged = merged.dropna().drop_duplicates()


# Pivoting 
df = merged.pivot_table(index='start', columns='sleep_metric', values='value', aggfunc='sum').reset_index().dropna()
df['start'] = pd.to_datetime(df['start'])

df
# df

sleep_metric,start,percentageNightAwake,percentageNightDeep,percentageNightLight,percentageNightRem,percentageSleepAwake,percentageSleepDeep,percentageSleepLight,percentageSleepRem,sleepEfficiency,sleepLatency,sleepWakeups,timeNightAsleep,timeNightAwake,timeNightDeep,timeNightInBed,timeNightLight,timeNightRem,timeSleepAwake,wakeupsOver15Minutes
98,2022-01-09,7.27,10.61,57.88,24.25,7.39,10.90,59.46,24.91,92.733020,14.016666,4.0,489.16666,38.33,55.95,527.50000,305.30,127.92,37.95,0.0
99,2022-01-10,10.40,16.53,50.10,22.97,10.60,17.05,51.66,23.68,89.604576,14.516666,6.0,430.55000,49.95,79.45,945.05000,240.73,110.37,49.42,0.0
100,2022-01-11,10.97,15.02,59.93,14.07,11.16,15.43,61.58,14.46,89.028980,17.016666,8.0,568.45000,70.05,95.90,1009.45000,382.68,89.87,69.38,2.0
101,2022-01-12,9.43,15.92,58.65,16.00,9.56,16.30,60.05,16.38,90.567055,3.016667,9.0,503.10000,52.40,88.43,1035.50000,325.78,88.88,51.87,0.0
102,2022-01-13,5.84,11.59,63.57,18.99,5.86,11.82,64.82,19.37,94.156006,10.516666,4.0,515.03330,31.97,63.40,916.53333,347.73,103.90,31.45,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
903,2024-04-22,34.91,11.42,47.37,9.41,30.59,12.13,50.29,9.99,165.084950,24.616667,15.0,981.85003,171.45,53.97,1153.29999,223.80,44.45,143.40,1.0
904,2024-04-23,17.80,0.00,0.00,0.00,16.79,0.00,0.00,0.00,182.195464,0.000000,20.0,682.46666,68.80,0.00,1213.23333,0.00,0.00,65.22,1.0
905,2024-04-24,13.14,14.83,66.83,15.08,11.13,15.10,68.06,15.35,186.859640,5.433333,30.0,872.01666,66.38,58.97,938.40000,265.77,59.95,58.07,0.0
906,2024-04-25,49.36,7.81,42.65,13.22,39.44,8.96,48.91,15.16,150.639854,76.983330,24.0,787.30000,278.95,46.95,1066.25000,256.30,79.45,199.09,1.0


In [4]:

df['start'] = pd.to_datetime(df['start'])

# Define the date ranges for Cornell University's school days
school_start_date_1 = pd.to_datetime('2022-08-21')
school_end_date_1 = pd.to_datetime('2022-12-16')
school_start_date_2 = pd.to_datetime('2023-01-22')
school_end_date_2 = pd.to_datetime('2023-05-18')

# Determines if a date falls within the school days
def is_school_day(date):
    return (school_start_date_1 <= date <= school_end_date_1) or (school_start_date_2 <= date <= school_end_date_2)

# Adding a new column called 'timeframe'
df['timeframe'] = df['start'].apply(lambda x: 'school' if is_school_day(x) else 'summer')

# Calculating the average sleep latency per night of the week for school and summer
school_avg_sleep_latency = df[df['timeframe'] == 'school'].groupby(df['start'].dt.day_name())['sleepLatency'].mean()
summer_avg_sleep_latency = df[df['timeframe'] == 'summer'].groupby(df['start'].dt.day_name())['sleepLatency'].mean()

# Creating DataFrames for the results
school_sleep_latency_df = pd.DataFrame({'weekday': school_avg_sleep_latency.index, 'school_latency': school_avg_sleep_latency.values})
summer_sleep_latency_df = pd.DataFrame({'weekday': summer_avg_sleep_latency.index, 'summer_latency': summer_avg_sleep_latency.values})


# Sorting days from Monday to Sunday + combine dataframes
days_of_week = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
school_sleep_latency_df = school_sleep_latency_df.set_index('weekday').reindex(days_of_week).reset_index()
summer_sleep_latency_df = summer_sleep_latency_df.set_index('weekday').reindex(days_of_week).reset_index()
sleep_latency = pd.merge(school_sleep_latency_df, summer_sleep_latency_df, on='weekday')

# Finding average sleep latency per category
mean_school_latency = school_avg_sleep_latency.mean()
mean_summer_latency = summer_avg_sleep_latency.mean()

sleep_latency["school_percent_deviation"] = 100*(sleep_latency["school_latency"] - mean_school_latency)/(mean_school_latency)
sleep_latency["summer_percent_deviation"] = 100*(sleep_latency["summer_latency"] - mean_summer_latency)/(mean_summer_latency)

sleep_latency



Unnamed: 0,weekday,school_latency,summer_latency,school_percent_deviation,summer_percent_deviation
0,Monday,12.394444,12.997222,-61.678374,-15.630795
1,Tuesday,31.777194,17.9375,-1.750034,16.438158
2,Wednesday,27.271969,16.214035,-15.679464,5.250585
3,Thursday,34.144166,19.815789,5.568264,28.630746
4,Friday,43.3025,16.428161,33.884358,6.640545
5,Saturday,50.831251,12.906173,57.162042,-16.221827
6,Sunday,26.680953,11.537333,-17.506791,-25.107411
