In [6]:
data_path = "data/Sleep_Efficiency.csv"

In [7]:
import pandas as pd

data = pd.read_csv(data_path)

In [8]:
data.drop_duplicates(inplace=True)
data.dropna(inplace=True)

data.isnull().sum()

ID                        0
Age                       0
Gender                    0
Bedtime                   0
Wakeup time               0
Sleep duration            0
Sleep efficiency          0
REM sleep percentage      0
Deep sleep percentage     0
Light sleep percentage    0
Awakenings                0
Caffeine consumption      0
Alcohol consumption       0
Smoking status            0
Exercise frequency        0
dtype: int64

In [9]:
data['Gender'] = data['Gender'].replace({'Male': 1, 'Female': 0})
data['Smoking status'] = data['Smoking status'].replace({'Yes': 1, 'No': 0})

import datetime as dt

data['Bedtime'] = pd.to_datetime(data['Bedtime']).dt.time
data['Bedtime'] = data['Bedtime'].apply(lambda x: dt.datetime.combine(dt.date(1, 1, 1), x).toordinal())

data['Wakeup time'] = pd.to_datetime(data['Wakeup time']).dt.time
data['Wakeup time'] = data['Wakeup time'].apply(lambda x: dt.datetime.combine(dt.date(1, 1, 1), x).toordinal())

data.head()

Unnamed: 0,ID,Age,Gender,Bedtime,Wakeup time,Sleep duration,Sleep efficiency,REM sleep percentage,Deep sleep percentage,Light sleep percentage,Awakenings,Caffeine consumption,Alcohol consumption,Smoking status,Exercise frequency
0,1,65,0,1,1,6.0,0.88,18,70,12,0.0,0.0,0.0,1,3.0
1,2,69,1,1,1,7.0,0.66,19,28,53,3.0,0.0,3.0,1,3.0
2,3,40,0,1,1,8.0,0.89,20,70,10,1.0,0.0,0.0,0,3.0
3,4,40,0,1,1,6.0,0.51,23,25,52,3.0,50.0,5.0,1,1.0
4,5,57,1,1,1,8.0,0.76,27,55,18,3.0,0.0,3.0,0,3.0


In [25]:
y

0      0.88
1      0.66
2      0.89
3      0.51
4      0.76
       ... 
445    0.53
447    0.91
448    0.74
450    0.76
451    0.63
Name: Sleep efficiency, Length: 388, dtype: float64

In [10]:
# feature selection
X = data[['Age', 'Gender', 'Sleep duration',
       'REM sleep percentage', 'Deep sleep percentage', 'Light sleep percentage',
       'Awakenings', 'Caffeine consumption', 'Smoking Status', 'Exercise frequency']]

# target variable
y = data['Sleep efficiency']

In [11]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((310, 8), (78, 8), (310,), (78,))

In [14]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

### Gradient Boosting Regressor

In [18]:
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor

model = GradientBoostingRegressor(random_state=8737)
model.fit(X_train, y_train)

print("Accuracy for Gradient Boosting Regresser : {:.2f}\n".format(model.score(X_test, y_test)))

y_pred_lr = model.predict(X_test)

lr_mae = mean_absolute_error(y_test, y_pred_lr)
lr_mse = mean_squared_error(y_test, y_pred_lr)
lr_rmse = np.sqrt(lr_mse)
lr_r2 = r2_score(y_test, y_pred_lr)
lr_mape = np.mean(np.abs((y_test - y_pred_lr) / y_test)) * 100

print(f"Mean Absolute Error (MAE): {lr_mae.round(3)}")
print(f"Mean Squared Error (MSE): {lr_mse.round(3)}")
print(f"Root Mean Squared Error (RMSE): {lr_rmse.round(3)}")
print(f"R-squared (R²): {lr_r2.round(3)}")
print(f"Mean Absolute Percentage Error (MAPE): {lr_mape.round(3)}")

Accuracy for Gradient Boosting Regresser : 0.87

Mean Absolute Error (MAE): 0.041
Mean Squared Error (MSE): 0.003
Root Mean Squared Error (RMSE): 0.05
R-squared (R²): 0.866
Mean Absolute Percentage Error (MAPE): 5.74


### Random Forest Regressor

In [19]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(n_jobs=-1, random_state=8737)
model.fit(X_train, y_train)

print("Accuracy for Random Forest Regresser : {:.2f}\n".format(model.score(X_test, y_test)))

y_pred_lr = model.predict(X_test)

lr_mae = mean_absolute_error(y_test, y_pred_lr)
lr_mse = mean_squared_error(y_test, y_pred_lr)
lr_rmse = np.sqrt(lr_mse)
lr_r2 = r2_score(y_test, y_pred_lr)
lr_mape = np.mean(np.abs((y_test - y_pred_lr) / y_test)) * 100

print(f"Mean Absolute Error (MAE): {lr_mae.round(3)}")
print(f"Mean Squared Error (MSE): {lr_mse.round(3)}")
print(f"Root Mean Squared Error (RMSE): {lr_rmse.round(3)}")
print(f"R-squared (R²): {lr_r2.round(3)}")
print(f"Mean Absolute Percentage Error (MAPE): {lr_mape.round(3)}")

Accuracy for Random Forest Regresser : 0.87

Mean Absolute Error (MAE): 0.039
Mean Squared Error (MSE): 0.002
Root Mean Squared Error (RMSE): 0.049
R-squared (R²): 0.874
Mean Absolute Percentage Error (MAPE): 5.499


### Ada Boost Regressor

In [20]:
import numpy as np
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

model = AdaBoostRegressor(random_state=8737)
model.fit(X_train, y_train)

print("Accuracy for Ada Boost Regresser : {:.2f}\n".format(model.score(X_test, y_test)))

y_pred_lr = model.predict(X_test)

lr_mae = mean_absolute_error(y_test, y_pred_lr)
lr_mse = mean_squared_error(y_test, y_pred_lr)
lr_rmse = np.sqrt(lr_mse)
lr_r2 = r2_score(y_test, y_pred_lr)
lr_mape = np.mean(np.abs((y_test - y_pred_lr) / y_test)) * 100

print(f"Mean Absolute Error (MAE): {lr_mae.round(3)}")
print(f"Mean Squared Error (MSE): {lr_mse.round(3)}")
print(f"Root Mean Squared Error (RMSE): {lr_rmse.round(3)}")
print(f"R-squared (R²): {lr_r2.round(3)}")
print(f"Mean Absolute Percentage Error (MAPE): {lr_mape.round(3)}")

Accuracy for Ada Boost Regresser : 0.86

Mean Absolute Error (MAE): 0.043
Mean Squared Error (MSE): 0.003
Root Mean Squared Error (RMSE): 0.051
R-squared (R²): 0.863
Mean Absolute Percentage Error (MAPE): 5.963


### Stacking Regressor

In [22]:
import numpy as np
from sklearn.ensemble import StackingRegressor

model = StackingRegressor(estimators=[('ada', AdaBoostRegressor(random_state=8737))], n_jobs=-1)
model.fit(X_train, y_train)

print("Accuracy for Ada Boost Regresser : {:.2f}\n".format(model.score(X_test, y_test)))

y_pred_lr = model.predict(X_test)

lr_mae = mean_absolute_error(y_test, y_pred_lr)
lr_mse = mean_squared_error(y_test, y_pred_lr)
lr_rmse = np.sqrt(lr_mse)
lr_r2 = r2_score(y_test, y_pred_lr)
lr_mape = np.mean(np.abs((y_test - y_pred_lr) / y_test)) * 100

print(f"Mean Absolute Error (MAE): {lr_mae.round(3)}")
print(f"Mean Squared Error (MSE): {lr_mse.round(3)}")
print(f"Root Mean Squared Error (RMSE): {lr_rmse.round(3)}")
print(f"R-squared (R²): {lr_r2.round(3)}")
print(f"Mean Absolute Percentage Error (MAPE): {lr_mape.round(3)}")

Accuracy for Ada Boost Regresser : 0.86

Mean Absolute Error (MAE): 0.042
Mean Squared Error (MSE): 0.003
Root Mean Squared Error (RMSE): 0.051
R-squared (R²): 0.861
Mean Absolute Percentage Error (MAPE): 5.965


### Bagging Regressor

In [24]:
import numpy as np
from sklearn.ensemble import BaggingRegressor

model = BaggingRegressor(n_jobs=-1)
model.fit(X_train, y_train)

print("Accuracy for Baggin Regressor : {:.2f}\n".format(model.score(X_test, y_test)))

y_pred_lr = model.predict(X_test)

lr_mae = mean_absolute_error(y_test, y_pred_lr)
lr_mse = mean_squared_error(y_test, y_pred_lr)
lr_rmse = np.sqrt(lr_mse)
lr_r2 = r2_score(y_test, y_pred_lr)
lr_mape = np.mean(np.abs((y_test - y_pred_lr) / y_test)) * 100

print(f"Mean Absolute Error (MAE): {lr_mae.round(3)}")
print(f"Mean Squared Error (MSE): {lr_mse.round(3)}")
print(f"Root Mean Squared Error (RMSE): {lr_rmse.round(3)}")
print(f"R-squared (R²): {lr_r2.round(3)}")
print(f"Mean Absolute Percentage Error (MAPE): {lr_mape.round(3)}")

Accuracy for Baggin Regressor : 0.87

Mean Absolute Error (MAE): 0.041
Mean Squared Error (MSE): 0.003
Root Mean Squared Error (RMSE): 0.05
R-squared (R²): 0.867
Mean Absolute Percentage Error (MAPE): 5.684
