In [520]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, classification_report

import torch
from torch import nn, argmax
from torch.utils.data import DataLoader, TensorDataset

In [521]:
df = pd.read_csv('sleep_health_and_lifestyle_dataset.csv')
df["Gender"] = df["Gender"].apply(lambda x: 0 if x == 'Male' else 1)
df[:10]

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,0,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,0,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,0,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,0,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,0,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
5,6,0,28,Software Engineer,5.9,4,30,8,Obese,140/90,85,3000,Insomnia
6,7,0,29,Teacher,6.3,6,40,7,Obese,140/90,82,3500,Insomnia
7,8,0,29,Doctor,7.8,7,75,6,Normal,120/80,70,8000,
8,9,0,29,Doctor,7.8,7,75,6,Normal,120/80,70,8000,
9,10,0,29,Doctor,7.8,7,75,6,Normal,120/80,70,8000,


In [522]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
le.fit(list(df["Occupation"].unique().tolist()))
df["Occupation"] = le.transform(df["Occupation"])

df["Sleep Duration"] = df["Sleep Duration"].apply(lambda x: int(x*60))

bmi_map = {"Normal":0, "Overweight":1, "Obese":2}
df["BMI Category"] = df["BMI Category"].apply(lambda bmi: "Normal" if "Normal" in bmi else bmi)
df["BMI Category"] = df["BMI Category"].apply(lambda bmi: bmi_map[bmi])

df["Sleep Disorder"] = df["Sleep Disorder"].apply(lambda sd: 1 if sd == "Sleep Apnea" else (2 if sd == "Insomnia" else 0))

df[["Systolic", "Diastolic"]] = df["Blood Pressure"].str.split("/", expand=True)
df["Systolic"] = df["Systolic"].astype(int)
df["Diastolic"] = df["Diastolic"].astype(int)

df = df.drop(columns=["Blood Pressure"])

df.head(10)

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,Systolic,Diastolic
0,1,0,27,9,366,6,42,6,1,77,4200,0,126,83
1,2,0,28,1,372,6,60,8,0,75,10000,0,125,80
2,3,0,28,1,372,6,60,8,0,75,10000,0,125,80
3,4,0,28,6,354,4,30,8,2,85,3000,1,140,90
4,5,0,28,6,354,4,30,8,2,85,3000,1,140,90
5,6,0,28,9,354,4,30,8,2,85,3000,2,140,90
6,7,0,29,10,378,6,40,7,2,82,3500,2,140,90
7,8,0,29,1,468,7,75,6,0,70,8000,0,120,80
8,9,0,29,1,468,7,75,6,0,70,8000,0,120,80
9,10,0,29,1,468,7,75,6,0,70,8000,0,120,80


In [523]:
X = df.drop(columns=['Sleep Disorder'])
y = df[['Sleep Disorder']]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0
)
X_train_tensor = torch.tensor(X_train.values).to(torch.float32)
X_test_tensor = torch.tensor(X_test.values).to(torch.float32)

y_train_tensor = torch.tensor(y_train.values).to(torch.long)
y_test_tensor = torch.tensor(y_test.values).to(torch.long)


In [None]:
class TunableModel(nn.Module):
    def __init__(self, num_hidden_nodes, num_hidden_layers):
        super().__init__()
        self.input_layer = nn.Linear(13, num_hidden_nodes)
        
        hidden_layers = []
        for _ in range(num_hidden_layers):
            hidden_layers.append(nn.Linear(num_hidden_nodes, num_hidden_nodes))
            hidden_layers.append(nn.Softmax())

        self.hidden_layers = nn.Sequential(*hidden_layers)

    
        self.output_layer = nn.Linear(num_hidden_nodes, 3)
        self.softmax = nn.Softmax()


    def forward(self, x):
        x = self.input_layer(x)
        x = self.hidden_layers(x)
        x = self.output_layer(x)
        return x

In [525]:
model = TunableModel(num_hidden_nodes=20, num_hidden_layers=3)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
epochs = 3
batch_size = 5
torch.manual_seed(0)

<torch._C.Generator at 0x7f86f5b61f90>

In [526]:
for epoch in range(epochs):
    running_loss = 0.0
    num_batches = len(X_train_tensor) // batch_size + int(len(X_train_tensor) % batch_size != 0)
    
    for i in range(0, len(X_train_tensor), batch_size):
        X_batch = X_train_tensor[i:i + batch_size]
        y_batch = y_train_tensor[i:i + batch_size]

        y_batch = y_batch.squeeze()

        outputs = model(X_batch)
        loss = loss_fn(outputs, y_batch)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/num_batches:.4f}")


  return self._call_impl(*args, **kwargs)


Epoch [1/3], Loss: 1.3606
Epoch [2/3], Loss: 1.3606
Epoch [3/3], Loss: 1.3606


In [527]:
model.eval()
labels = []
predictions = []

with torch.no_grad():
    outputs = model(X_test_tensor)
    
    y_test_tensor = y_test_tensor.squeeze()

    _, predicted = torch.max(outputs, 1)
    labels.extend(y_test_tensor.numpy())
    predictions.extend(predicted.numpy())

print(classification_report(labels, predictions))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00        41
           1       0.00      0.00      0.00        14
           2       0.27      1.00      0.42        20

    accuracy                           0.27        75
   macro avg       0.09      0.33      0.14        75
weighted avg       0.07      0.27      0.11        75



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
