In [1]:
import pandas as pd

In [2]:
import os
import kagglehub

path = kagglehub.dataset_download("uom190346a/sleep-health-and-lifestyle-dataset")

file_path = os.path.join(path, "Sleep_health_and_lifestyle_dataset.csv")
df = pd.read_csv(file_path)

Using Colab cache for faster access to the 'sleep-health-and-lifestyle-dataset' dataset.


## Checagem do Dataset

In [None]:
df.head()

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea


In [None]:
df['Blood Pressure'].value_counts()

Unnamed: 0_level_0,count
Blood Pressure,Unnamed: 1_level_1
130/85,99
140/95,65
125/80,65
120/80,45
115/75,32
135/90,27
140/90,4
125/82,4
128/85,3
132/87,3


In [None]:
print(f"{df.shape} \n")
print(f"{df.dtypes} \n")
print(f"{df.isnull().sum()} \n")

(374, 13) 

Person ID                    int64
Gender                      object
Age                          int64
Occupation                  object
Sleep Duration             float64
Quality of Sleep             int64
Physical Activity Level      int64
Stress Level                 int64
BMI Category                object
Blood Pressure              object
Heart Rate                   int64
Daily Steps                  int64
Sleep Disorder              object
dtype: object 

Person ID                    0
Gender                       0
Age                          0
Occupation                   0
Sleep Duration               0
Quality of Sleep             0
Physical Activity Level      0
Stress Level                 0
BMI Category                 0
Blood Pressure               0
Heart Rate                   0
Daily Steps                  0
Sleep Disorder             219
dtype: int64 



## Dataset Base para previsão





In [None]:
def base_dataset(df):

  df_base = df.copy()

  df_base = df_base.drop(columns=['Person ID'])
  df_base["Sleep Disorder"] = df_base["Sleep Disorder"].fillna("No Disorder")
  df_base[["Max", "Min"]] = df_base['Blood Pressure'].str.split('/', expand=True).astype(int)

  def target_category(row):
    max = row['Max']
    min = row['Min']

    if max >= 180 or min >= 110:
      return "Hypertension_Stage3"

    elif 180 > max >= 160 or 110 > min >= 100:
      return "Hypertension_Stage2"

    elif 160 > max >= 140 or 100 > min >= 90:
      return "Hypertension_Stage1"

    elif 140 > max >= 130 or 90 > min >= 85:
      return "Elevated"

    return "Normal"

  df_base["BP_Category"] = df_base.apply(target_category, axis=1)
  df_base = df_base.drop(columns=['Blood Pressure', 'Max', 'Min'])

  return df_base

In [None]:
df_clean = base_dataset(df)
df_clean["BP_Category"].value_counts()

Unnamed: 0_level_0,count
BP_Category,Unnamed: 1_level_1
Normal,163
Elevated,111
Hypertension_Stage1,100


## Dataset Features Adicionais

In [None]:
import numpy as np

def features_dataset(df_clean):

  df_features = df_clean.copy()

  df_features["Quality_Sleep_Mean"] = df_features["Sleep Duration"] / df_features["Quality of Sleep"]
  df_features["Mean_Stress_Cardiac"] = df_features["Stress Level"] / df_features["Heart Rate"]

  df_features["Mean_Stress_Cardiac"] = df_features["Mean_Stress_Cardiac"].replace([np.inf, -np.inf], 0)

  return df_features

In [None]:
df_features = features_dataset(df_clean)
df_features["Mean_Stress_Cardiac"].info()

<class 'pandas.core.series.Series'>
RangeIndex: 374 entries, 0 to 373
Series name: Mean_Stress_Cardiac
Non-Null Count  Dtype  
--------------  -----  
374 non-null    float64
dtypes: float64(1)
memory usage: 3.1 KB


# Salvar no Drive

In [None]:
import os
from google.colab import drive

drive.mount('/content/drive')

nome_da_pasta = "Ps Ligia Time 16"
caminho_base = "/content/drive/MyDrive/"
caminho_completo = os.path.join(caminho_base, nome_da_pasta)

if not os.path.exists(caminho_completo):
    os.makedirs(caminho_completo)
    print(f"Pasta '{nome_da_pasta}' criada com sucesso!")
else:
    print(f"A pasta '{nome_da_pasta}' já existe.")

# CSV base
caminho_arquivo_base = os.path.join(caminho_completo, "risco_cardiovascular_base.csv")
df_clean.to_csv(caminho_arquivo_base, index=False)

# CSV features adicionais
caminho_arquivo_base = os.path.join(caminho_completo, "risco_cardiovascular_features.csv")
df_features.to_csv(caminho_arquivo_base, index=False)

print(f"Dataset salvo em: {caminho_arquivo_base}")

Mounted at /content/drive
Pasta 'Ps Ligia Time 16' criada com sucesso!
Dataset salvo em: /content/drive/MyDrive/Ps Ligia Time 16/risco_cardiovascular_features.csv
