In [31]:
from pathlib import Path
import pandas as pd
import scipy.stats as stats

In [35]:
# File path
sleep_health_data = Path("Resources/Sleep_health_and_lifestyle_dataset.csv")

# Read data 
sleep_df = pd.read_csv(sleep_health_data)
sleep_df.head()

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea


In [36]:
# Sample size for each occupation
occupation_count = sleep_df['Occupation'].value_counts()
occupation_count

Nurse                   73
Doctor                  71
Engineer                63
Lawyer                  47
Teacher                 40
Accountant              37
Salesperson             32
Software Engineer        4
Scientist                4
Sales Representative     2
Manager                  1
Name: Occupation, dtype: int64

In [42]:
# Filter out small sample sizes
filtered_sleep_df = sleep_df.loc[sleep_df['Occupation'].isin(['Nurse','Doctor', 'Engineer', 'Lawyer', 'Teacher', 'Accountant', 'Salesperson'])]
filtered_occupation_count = filtered_sleep_df['Occupation'].value_counts()
filtered_occupation_count

Nurse          73
Doctor         71
Engineer       63
Lawyer         47
Teacher        40
Accountant     37
Salesperson    32
Name: Occupation, dtype: int64

In [43]:
# Summary statistics table of mean, median, variance, standard deviation, and SEM of the quality of sleep for each occupation 
sleep_quality_mean = filtered_sleep_df.groupby('Occupation').mean()['Quality of Sleep']
sleep_quality_median = filtered_sleep_df.groupby('Occupation').median()['Quality of Sleep']
sleep_quality_var = filtered_sleep_df.groupby('Occupation').var()['Quality of Sleep']
sleep_quality_std = filtered_sleep_df.groupby('Occupation').std()['Quality of Sleep']
sleep_quality_sem = filtered_sleep_df.groupby('Occupation').sem()['Quality of Sleep']

# DataFrame of resulting series

sleep_quality_df = pd.DataFrame({'Mean Sleep Quality': sleep_quality_mean,
                       'Median Sleep Quality': sleep_quality_median,
                       'Sleep Quality Variance': sleep_quality_var,
                       'Sleep Quality Std. Dev.': sleep_quality_std,
                       'Sleep Quality Std. Err.': sleep_quality_sem})

sleep_quality_df

Unnamed: 0_level_0,Mean Sleep Quality,Median Sleep Quality,Sleep Quality Variance,Sleep Quality Std. Dev.,Sleep Quality Std. Err.
Occupation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Accountant,7.891892,8.0,0.21021,0.458487,0.075375
Doctor,6.647887,7.0,0.574245,0.75779,0.089933
Engineer,8.412698,9.0,0.568868,0.754234,0.095025
Lawyer,7.893617,8.0,0.097132,0.311661,0.04546
Nurse,7.369863,6.0,2.402968,1.550151,0.181431
Salesperson,6.0,6.0,0.0,0.0,0.0
Teacher,6.975,7.0,0.435256,0.65974,0.104314


In [44]:
# Summary statistics table of mean, median, variance, standard deviation, and SEM of the stress level for each occupation 
stress_level_mean = filtered_sleep_df.groupby('Occupation').mean()['Stress Level']
stress_level_median = filtered_sleep_df.groupby('Occupation').median()['Stress Level']
stress_level_var = filtered_sleep_df.groupby('Occupation').var()['Stress Level']
stress_level_std = filtered_sleep_df.groupby('Occupation').std()['Stress Level']
stress_level_sem = filtered_sleep_df.groupby('Occupation').sem()['Stress Level']

# DataFrame of resulting series

stress_level_df = pd.DataFrame({'Mean Stress Level': stress_level_mean,
                       'Median Stress Level': stress_level_median,
                       'Stress Level Variance': stress_level_var,
                       'Stress Level Std. Dev.': stress_level_std,
                       'Stress Level Std. Err.': stress_level_sem})

stress_level_df

Unnamed: 0_level_0,Mean Stress Level,Median Stress Level,Stress Level Variance,Stress Level Std. Dev.,Stress Level Std. Err.
Occupation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Accountant,4.594595,4.0,1.525526,1.235122,0.203053
Doctor,6.732394,6.0,1.884507,1.372773,0.162918
Engineer,3.888889,3.0,1.293907,1.1375,0.143312
Lawyer,5.06383,5.0,0.061055,0.247092,0.036042
Nurse,5.547945,6.0,5.723364,2.392355,0.280004
Salesperson,7.0,7.0,0.0,0.0,0.0
Teacher,4.525,4.0,1.127564,1.061868,0.167896


In [45]:
# Combine DataFrames for a cohesive table to analyze
combined_sleep_data = pd.merge(sleep_quality_df, stress_level_df, on='Occupation', how='outer')
combined_sleep_data

Unnamed: 0_level_0,Mean Sleep Quality,Median Sleep Quality,Sleep Quality Variance,Sleep Quality Std. Dev.,Sleep Quality Std. Err.,Mean Stress Level,Median Stress Level,Stress Level Variance,Stress Level Std. Dev.,Stress Level Std. Err.
Occupation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Accountant,7.891892,8.0,0.21021,0.458487,0.075375,4.594595,4.0,1.525526,1.235122,0.203053
Doctor,6.647887,7.0,0.574245,0.75779,0.089933,6.732394,6.0,1.884507,1.372773,0.162918
Engineer,8.412698,9.0,0.568868,0.754234,0.095025,3.888889,3.0,1.293907,1.1375,0.143312
Lawyer,7.893617,8.0,0.097132,0.311661,0.04546,5.06383,5.0,0.061055,0.247092,0.036042
Nurse,7.369863,6.0,2.402968,1.550151,0.181431,5.547945,6.0,5.723364,2.392355,0.280004
Salesperson,6.0,6.0,0.0,0.0,0.0,7.0,7.0,0.0,0.0,0.0
Teacher,6.975,7.0,0.435256,0.65974,0.104314,4.525,4.0,1.127564,1.061868,0.167896
