In [301]:
import pandas as pd
import plotly.express as px
from sklearn.preprocessing import LabelEncoder
from scipy.stats import f_oneway
from scipy import stats
from scipy.stats import chi2_contingency


In [302]:
#Dataset Columns:
#Person ID: An identifier for each individual.
#Gender: The gender of the person (Male/Female).
#Age: The age of the person in years.
#Occupation: The occupation or profession of the person.
#Sleep Duration (hours): The number of hours the person sleeps per day.
#Quality of Sleep (scale: 1-10): A subjective rating of the quality of sleep, ranging from 1 to 10.
#Physical Activity Level (minutes/day): The number of minutes the person engages in physical activity daily.
#Stress Level (scale: 1-10): A subjective rating of the stress level experienced by the person, ranging from 1 to 10.
#BMI Category: The BMI category of the person (e.g., Underweight, Normal, Overweight).
#Blood Pressure (systolic/diastolic): The blood pressure measurement of the person, indicated as systolic pressure over diastolic pressure.
#Heart Rate (bpm): The resting heart rate of the person in beats per minute.
#Daily Steps: The number of steps the person takes per day.
#Sleep Disorder: The presence or absence of a sleep disorder in the person (None, Insomnia, Sleep Apnea).

In [303]:
data = pd.read_csv('Sleep_health_and_lifestyle_dataset.csv')

In [304]:
display(data.head(15))
display(data.info())
display(data.describe())

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
5,6,Male,28,Software Engineer,5.9,4,30,8,Obese,140/90,85,3000,Insomnia
6,7,Male,29,Teacher,6.3,6,40,7,Obese,140/90,82,3500,Insomnia
7,8,Male,29,Doctor,7.8,7,75,6,Normal,120/80,70,8000,
8,9,Male,29,Doctor,7.8,7,75,6,Normal,120/80,70,8000,
9,10,Male,29,Doctor,7.8,7,75,6,Normal,120/80,70,8000,


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 374 entries, 0 to 373
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Person ID                374 non-null    int64  
 1   Gender                   374 non-null    object 
 2   Age                      374 non-null    int64  
 3   Occupation               374 non-null    object 
 4   Sleep Duration           374 non-null    float64
 5   Quality of Sleep         374 non-null    int64  
 6   Physical Activity Level  374 non-null    int64  
 7   Stress Level             374 non-null    int64  
 8   BMI Category             374 non-null    object 
 9   Blood Pressure           374 non-null    object 
 10  Heart Rate               374 non-null    int64  
 11  Daily Steps              374 non-null    int64  
 12  Sleep Disorder           155 non-null    object 
dtypes: float64(1), int64(7), object(5)
memory usage: 38.1+ KB


None

Unnamed: 0,Person ID,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,Heart Rate,Daily Steps
count,374.0,374.0,374.0,374.0,374.0,374.0,374.0,374.0
mean,187.5,42.184492,7.132086,7.312834,59.171123,5.385027,70.165775,6816.84492
std,108.108742,8.673133,0.795657,1.196956,20.830804,1.774526,4.135676,1617.915679
min,1.0,27.0,5.8,4.0,30.0,3.0,65.0,3000.0
25%,94.25,35.25,6.4,6.0,45.0,4.0,68.0,5600.0
50%,187.5,43.0,7.2,7.0,60.0,5.0,70.0,7000.0
75%,280.75,50.0,7.8,8.0,75.0,7.0,72.0,8000.0
max,374.0,59.0,8.5,9.0,90.0,8.0,86.0,10000.0


In [305]:
data = data.drop('Person ID', axis=1)


In [306]:
display(data['Occupation'].unique())
data = data.groupby('Occupation').apply(lambda x: x.sort_values('Stress Level', ascending=False))
display(data1)
#Sleep Disorder
    #0 - Insomnia
    #1 - Sleep Apnea
    #2 - NaN
#BMI Category
    # Overwight 3
    # Normal 0
    # Obese 2
    # Normal Weight 1

array(['Software Engineer', 'Doctor', 'Sales Representative', 'Teacher',
       'Nurse', 'Engineer', 'Accountant', 'Scientist', 'Lawyer',
       'Salesperson', 'Manager'], dtype=object)

Unnamed: 0_level_0,Unnamed: 1_level_0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
Occupation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Accountant,311,312,Female,52,Accountant,6.6,7,45,7,Overweight,130/85,72,6000,Insomnia
Accountant,310,311,Female,52,Accountant,6.6,7,45,7,Overweight,130/85,72,6000,Insomnia
Accountant,309,310,Female,52,Accountant,6.6,7,45,7,Overweight,130/85,72,6000,Insomnia
Accountant,308,309,Female,52,Accountant,6.6,7,45,7,Overweight,130/85,72,6000,Insomnia
Accountant,307,308,Female,52,Accountant,6.5,7,45,7,Overweight,130/85,72,6000,Insomnia
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Teacher,101,102,Female,36,Teacher,7.2,8,60,4,Normal,115/75,68,7000,
Teacher,100,101,Female,36,Teacher,7.2,8,60,4,Normal,115/75,68,7000,
Teacher,99,100,Female,36,Teacher,7.1,8,60,4,Normal,115/75,68,7000,
Teacher,98,99,Female,36,Teacher,7.1,8,60,4,Normal,115/75,68,7000,


In [307]:
for coluna in data.columns:
    grafico =  px.histogram(data, x=coluna, color='Sleep Disorder')
    grafico.show()

#2 - NaN
#1 - Sleep Apnea
#0 - Insomnia

In [308]:
pie0 = px.pie(data, names='Gender', title='Gender')
pie1 = px.pie(data, names='BMI Category', title='BMI Category')
pie0.show()
pie1.show()
#Sleep Disorder
    #0 - Insomnia
    #1 - Sleep Apnea
    #2 - NaN
#BMI Category
    # Normal 0
    # Overwight 3
    # Normal Weight 1
    # Obese 2

#It was found that 58.6% of the participants do not have sleep disorders

In [309]:
#Transformar as colunas 'BMI Category' e 'Sleep Disorder' em valores numéricos
encoder = LabelEncoder()
data['BMI Category'] = encoder.fit_transform(data['BMI Category'])
data['Sleep Disorder'] = encoder.fit_transform(data['Sleep Disorder'])

In [310]:
#Teste de hipoteses: teste t de Student

f_stat, p_value = f_oneway(data['Age'], data['Sleep Duration'], data['Quality of Sleep'], data['Physical Activity Level'], data['Stress Level'], data['BMI Category'],data['Sleep Disorder'])

if p_value < 0.05:
    print("→ Há uma diferença significativa entre as médias dos grupos.")
else:
    print("x Não há uma diferença significativa entre as médias dos grupos.")

→ Há uma diferença significativa entre as médias dos grupos.


In [311]:
#Testes estatísticos
#dependente é Sleep Disorder data['Sleep Disorder']
#independente são as demais data['Age'], data['Sleep Duration'], data['Quality of Sleep'], data['Physical Activity Level'], data['Stress Level'], data['BMI Category']

#Regressão Linear
X = sm.add_constant(data['Sleep Duration'])
model = sm.OLS(data['Sleep Disorder'], X).fit()
display(model.summary())
print('------------')

#Coef de Pearson
corr_coefficient, p_value = stats.pearsonr(data['Sleep Disorder'], data['Sleep Duration'])
print(f"Coeficiente de correlação de Pearson: {corr_coefficient}")
print(f"P-valor: {p_value}")
print('------------')

#Qui-Quadrado
contingency_table = pd.crosstab(data['Sleep Duration'], data['Sleep Disorder'])
chi2, p, dof, expected = chi2_contingency(contingency_table)
if p < 0.05:
    print("→ Há uma relação significativa entre as variáveis.")
else:
    print("Não há uma relação significativa entre as variáveis.")

0,1,2,3
Dep. Variable:,Sleep Disorder,R-squared:,0.146
Model:,OLS,Adj. R-squared:,0.144
Method:,Least Squares,F-statistic:,63.58
Date:,"Fri, 28 Jul 2023",Prob (F-statistic):,1.92e-14
Time:,22:58:44,Log-Likelihood:,-419.84
No. Observations:,374,AIC:,843.7
Df Residuals:,372,BIC:,851.5
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-1.3792,0.348,-3.962,0.000,-2.064,-0.695
Sleep Duration,0.3868,0.049,7.973,0.000,0.291,0.482

0,1,2,3
Omnibus:,91.847,Durbin-Watson:,0.654
Prob(Omnibus):,0.0,Jarque-Bera (JB):,33.317
Skew:,-0.528,Prob(JB):,5.83e-08
Kurtosis:,1.989,Cond. No.,66.1


------------
Coeficiente de correlação de Pearson: 0.3820451694029506
P-valor: 1.9177920083864673e-14
------------
→ Há uma relação significativa entre as variáveis.
