In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score
import joblib

In [12]:
# Load the dataset
df = pd.read_csv("balanced_sleep_quality_dataset_no_timestamp_10t.csv")

In [13]:
# Prepare features (X) and target (y)
X = df[["Heart_Rate", "Acceleration", "SpO2", "Temperature"]]
y = df["Predicted_Sleep_Stage"]

In [14]:
# Prepare features (X) and target (y)
X = df[["Heart_Rate", "Acceleration", "SpO2", "Temperature"]]
y = df["Predicted_Sleep_Stage"]

# Encode the target labels (convert sleep stages to numeric values)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [15]:
# Save the label encoder for later use in the Flutter app
joblib.dump(label_encoder, "label_encoder-10t.pkl")

['label_encoder-10t.pkl']

In [16]:
# Scale the features (important for SVM, Logistic Regression, and KNN)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [17]:
# Save the scaler for later use
joblib.dump(scaler, "scaler-10t.pkl")

['scaler-10t.pkl']

In [18]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)


In [19]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

In [20]:
# Initialize individual models
random_forest_model = RandomForestClassifier(n_estimators=100, random_state=42)
svm_model = SVC(kernel='rbf', random_state=42)
logistic_regression_model = LogisticRegression(max_iter=1000, random_state=42)
knn_model = KNeighborsClassifier(n_neighbors=5)

# Train and evaluate Random Forest
print("\nTraining Random Forest...")
random_forest_model.fit(X_train, y_train)
rf_pred = random_forest_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_pred)
print("Random Forest Accuracy:", rf_accuracy)
print("Random Forest Classification Report:")
print(classification_report(y_test, rf_pred, target_names=label_encoder.classes_))

# Train and evaluate SVM
print("\nTraining SVM...")
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_pred)
print("SVM Accuracy:", svm_accuracy)
print("SVM Classification Report:")
print(classification_report(y_test, svm_pred, target_names=label_encoder.classes_))



# Train and evaluate Logistic Regression
print("\nTraining Logistic Regression...")
logistic_regression_model.fit(X_train, y_train)
lr_pred = logistic_regression_model.predict(X_test)
lr_accuracy = accuracy_score(y_test, lr_pred)

print("Logistic Regression Accuracy:", lr_accuracy)
print("Logistic Regression Classification Report:")
print(classification_report(y_test, lr_pred, target_names=label_encoder.classes_))

# Train and evaluate KNN
print("\nTraining KNN...")
knn_model.fit(X_train, y_train)
knn_pred = knn_model.predict(X_test)
knn_accuracy = accuracy_score(y_test, knn_pred)
joblib.dump(knn_model, "sleep_quality_model_knn.pkl")
print("KNN Accuracy:", knn_accuracy)
print("KNN Classification Report:")
print(classification_report(y_test, knn_pred, target_names=label_encoder.classes_))


Training Random Forest...
Random Forest Accuracy: 0.924
Random Forest Classification Report:
                   precision    recall  f1-score   support

            Awake       0.96      0.97      0.96       399
       Deep Sleep       0.99      1.00      1.00       399
      Light Sleep       0.85      0.85      0.85       409
              REM       0.84      0.85      0.84       377
Sleep Disturbance       0.97      0.96      0.96       416

         accuracy                           0.92      2000
        macro avg       0.92      0.92      0.92      2000
     weighted avg       0.92      0.92      0.92      2000


Training SVM...
SVM Accuracy: 0.9015
SVM Classification Report:
                   precision    recall  f1-score   support

            Awake       0.91      0.96      0.94       399
       Deep Sleep       0.98      0.99      0.98       399
      Light Sleep       0.82      0.83      0.82       409
              REM       0.83      0.81      0.82       377
Sleep Distu

In [21]:
# from sklearn.model_selection import GridSearchCV

# param_grid = {
#     'n_estimators': [100, 200, 300],
#     'max_depth': [10, 20, None],
#     'min_samples_split': [2, 5],
#     'min_samples_leaf': [1, 2],
#     'max_features': ['sqrt', 'log2']
# }
# grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring='accuracy')
# grid_search.fit(X_train, y_train)
# print("Best Parameters:", grid_search.best_params_)
# best_rf_model = grid_search.best_estimator_

In [22]:
# Initialize Random Forest with optimized parameters
rf_model = RandomForestClassifier(
    n_estimators=300,
    max_depth=10,
    max_features='sqrt',
    min_samples_leaf=1,
    min_samples_split=2,
    random_state=42
)

# Train the model
print("Training Optimized Random Forest...")
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("Optimized Random Forest Accuracy:", accuracy)
print("Optimized Random Forest Classification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Training Optimized Random Forest...
Optimized Random Forest Accuracy: 0.9315
Optimized Random Forest Classification Report:
                   precision    recall  f1-score   support

            Awake       0.93      1.00      0.96       399
       Deep Sleep       1.00      1.00      1.00       399
      Light Sleep       0.83      0.92      0.87       409
              REM       0.90      0.80      0.85       377
Sleep Disturbance       1.00      0.93      0.96       416

         accuracy                           0.93      2000
        macro avg       0.93      0.93      0.93      2000
     weighted avg       0.93      0.93      0.93      2000



In [24]:
#Save the trained model
joblib.dump(knn_model, "sleep_quality_model_random_forest_optimized-knn.pkl")

['sleep_quality_model_random_forest_optimized-knn.pkl']