In [158]:
# Import libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.cluster import KMeans
from scipy.stats import mode

In [147]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [148]:
# Load the dataset
path = '/content/drive/My Drive/University/Final Project/dataset.csv'
initial_data = pd.read_csv(path)

In [149]:
print("Initial Data:")
initial_data.head()

Initial Data:


Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea


In [150]:
# Drop unnecessary columns
mydata = initial_data.drop(['Person ID', 'Occupation', 'Sleep Disorder'], axis=1, errors='ignore')

In [151]:
# Display the cleaned data
print("Cleaned Data:")
mydata.head()

Cleaned Data:


Unnamed: 0,Gender,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps
0,Male,27,6.1,6,42,6,Overweight,126/83,77,4200
1,Male,28,6.2,6,60,8,Normal,125/80,75,10000
2,Male,28,6.2,6,60,8,Normal,125/80,75,10000
3,Male,28,5.9,4,30,8,Obese,140/90,85,3000
4,Male,28,5.9,4,30,8,Obese,140/90,85,3000


In [152]:
# Encode categorical variables
X = mydata.drop('Stress Level', axis=1)
y = mydata['Stress Level']
X = pd.get_dummies(X, drop_first=True)

In [153]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [131]:
# Random Forest
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)
rf_pred = rf_classifier.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_pred)

In [132]:
# SVM
svm_classifier = SVC(probability=True, kernel='rbf', random_state=42)
svm_classifier.fit(X_train, y_train)
svm_pred = svm_classifier.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_pred)

In [133]:
# Create a LabelEncoder object
label_encoder = LabelEncoder()

# Fit the encoder to your target variable and transform it
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

# XGBoost Classifier
xgb_classifier = XGBClassifier(n_estimators=100, learning_rate=0.1, random_state=42, use_label_encoder=False, eval_metric='logloss')
xgb_classifier.fit(X_train, y_train)
xgb_pred = xgb_classifier.predict(X_test)
xgb_accuracy = accuracy_score(y_test, xgb_pred)

Parameters: { "use_label_encoder" } are not used.



In [134]:
# Logistic Regression
logreg_classifier = LogisticRegression(max_iter=1000, random_state=42)
logreg_classifier.fit(X_train, y_train)
logreg_pred = logreg_classifier.predict(X_test)
logreg_accuracy = accuracy_score(y_test, logreg_pred)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [135]:
# KMeans Clustering (for clustering-based prediction)
scaler = StandardScaler()
# The 'Stress Level' column has already been removed from X.
# Use X directly for scaling:
X_scaled = scaler.fit_transform(X)
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X_scaled)

In [141]:
# Display all model accuracies
print("\nModel Accuracies:")
print(f"Random Forest Accuracy: {rf_accuracy:.2f}")
print(f"SVM Accuracy: {svm_accuracy:.2f}")
print(f"XGBoost Accuracy: {xgb_accuracy:.2f}")
print(f"Logistic Regression Accuracy: {logreg_accuracy:.2f}")


Model Accuracies:
Random Forest Accuracy: 1.00
SVM Accuracy: 0.41
XGBoost Accuracy: 1.00
Logistic Regression Accuracy: 0.89


In [136]:
# Define recommendation and action plan functions
def content_based_recommendation(stress_level, physical_activity, sleep_quality, bmi, blood_pressure):
    if stress_level > 7:
        return "Meditation, Yoga, Reduce screen time, Regular Sleep"
    elif physical_activity < 5:
        return "Increase Physical Activity, Morning Walk, Breathing Exercises"
    elif sleep_quality < 5:
        return "Improve Sleep Hygiene, Avoid Caffeine, Create Sleep Schedule"
    elif bmi > 30:
        return "Consult a nutritionist, Begin a weight loss program"
    elif blood_pressure > 140:
        return "Consult with a healthcare provider for blood pressure management"
    else:
        return "Maintain Current Lifestyle, Stay Positive"

In [175]:
def suicide_action_plan(stress_level, sleep_quality, physical_activity, daily_steps, bmi, blood_pressure):
    if stress_level > 8 or (sleep_quality < 3 and physical_activity < 2 and daily_steps < 1000):
        return "Critical Risk: Contact mental health professionals and emergency helplines immediately."
    elif stress_level > 7:
        return "High Risk: Consult a licensed psychologist or psychiatrist immediately."
    elif stress_level > 5:
        return "Moderate Risk: Practice mindfulness, yoga and consult a counselor."
    else:
        return "Low Risk: Maintain a healthy lifestyle and stay socially connected."

In [144]:
print("\nXGBoost Results:")
print(f"Accuracy: {xgb_accuracy:.2f}")
print(classification_report(y_test, xgb_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, xgb_pred))


XGBoost Results:
Accuracy: 1.00
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00        10
           2       1.00      1.00      1.00        14
           3       1.00      1.00      1.00        10
           4       1.00      1.00      1.00        12
           5       1.00      1.00      1.00        17

    accuracy                           1.00        75
   macro avg       1.00      1.00      1.00        75
weighted avg       1.00      1.00      1.00        75


Confusion Matrix:
 [[12  0  0  0  0  0]
 [ 0 10  0  0  0  0]
 [ 0  0 14  0  0  0]
 [ 0  0  0 10  0  0]
 [ 0  0  0  0 12  0]
 [ 0  0  0  0  0 17]]


In [164]:
# Function to test a single person's data
def test_single_person(gender, age, sleep_duration, physical_activity, bmi, blood_pressure, heart_rate, daily_steps):
    # Single input data (Modified to match original dataset structure)
    person_data = pd.DataFrame({
        'Gender': [gender],
        'Age': [age],
        'Sleep Duration': [sleep_duration],
        'Physical Activity Level': [physical_activity],
        'BMI': [bmi],
        'Blood Pressure': [blood_pressure],
        'Heart Rate': [heart_rate],
        'Daily Steps': [daily_steps]
    }, index=[0])  # Adding an index to avoid potential issues

    # Preprocess person_data to match the format used during training:
    # 1. One-hot encode categorical features
    person_data = pd.get_dummies(person_data, columns=['Gender'], drop_first=True)

    # 2. Ensure all columns from training data are present
    #    (Add missing columns with 0 values)
    for col in X_train.columns:  # X_train has the expected columns
        if col not in person_data.columns:
            person_data[col] = 0

    # 3. Reorder columns to match the order in training data
    person_data = person_data[X_train.columns]

    # Now you can safely scale the data:
    person_scaled = scaler.transform(person_data)

    # Predict stress level using the models
    rf_pred = rf_classifier.predict(person_data)[0]
    svm_pred = svm_classifier.predict(person_data)[0]
    kmeans_pred = kmeans.predict(person_scaled)[0]
    xgb_pred = xgb_classifier.predict(person_data)[0]

    # Combine predictions using voting (majority voting for categorical output)
    predictions = [rf_pred, svm_pred, xgb_pred]
    final_pred = max(set(predictions), key=predictions.count)  # Majority voting

    # Get stress relief recommendations
    recommendation = content_based_recommendation(
        stress_level=final_pred,  # Using the combined prediction
        physical_activity=physical_activity,
        sleep_quality=sleep_duration,
        bmi=bmi,
        blood_pressure=blood_pressure
    )

    # Suicide action plan
    action_plan = suicide_action_plan(
        stress_level=final_pred,  # Using the combined prediction
        sleep_quality=sleep_duration,
        physical_activity=physical_activity,
        daily_steps=daily_steps,
        bmi=bmi,
        blood_pressure=blood_pressure
    )

    # Output the results
    print(f"\nTest for Single Person's Data:")
    print(f"Gender: {gender}")
    print(f"Age: {age}")
    print(f"Sleep Duration: {sleep_duration} hours")
    print(f"Physical Activity Level: {physical_activity}")
    print(f"BMI: {bmi}")
    print(f"Blood Pressure: {blood_pressure} mmHg")
    print(f"Heart Rate: {heart_rate} bpm")
    print(f"Daily Steps: {daily_steps}")
    print(f"Predicted Stress Level (Random Forest): {rf_pred}")
    print(f"Predicted Stress Level (SVM): {svm_pred}")
    print(f"Predicted Stress Level (KMeans): {kmeans_pred}")
    print(f"Predicted Stress Level (XGBoost): {xgb_pred}")
    print(f"Final Predicted Stress Level (Majority Voting): {final_pred}")
    print(f"Recommendations: {recommendation}")
    print(f"Suicide Action Plan: {action_plan}")

In [160]:

def combined_prediction(gender, age, sleep_duration, physical_activity, bmi, blood_pressure, heart_rate, daily_steps):
    # Preprocess the input data as done earlier
    person_data = pd.DataFrame({
        'Gender': [gender],
        'Age': [age],
        'Sleep Duration': [sleep_duration],
        'Physical Activity Level': [physical_activity],
        'BMI': [bmi],
        'Blood Pressure': [blood_pressure],
        'Heart Rate': [heart_rate],
        'Daily Steps': [daily_steps]
    }, index=[0])

    person_data = pd.get_dummies(person_data, columns=['Gender'], drop_first=True)

    # Ensure all columns from training data are present (add missing columns with 0 values)
    for col in X_train.columns:
        if col not in person_data.columns:
            person_data[col] = 0

    # Reorder columns to match the order in training data
    person_data = person_data[X_train.columns]

    # Scale the data
    person_scaled = scaler.transform(person_data)

    # Get predictions from all models
    rf_pred = rf_classifier.predict(person_data)[0]
    svm_pred = svm_classifier.predict(person_data)[0]
    xgb_pred = xgb_classifier.predict(person_data)[0]
    kmeans_pred = kmeans.predict(person_scaled)[0]

    # Combine predictions using majority voting
    predictions = [rf_pred, svm_pred, xgb_pred, kmeans_pred]

    # Get the mode and count
    mode_result = mode(predictions)

    # Access the mode value correctly:
    # If mode_result.mode is an array, get the first element; otherwise, use the scalar value directly.
    final_prediction = mode_result.mode[0] if isinstance(mode_result.mode, np.ndarray) else mode_result.mode



    # Get stress relief recommendations and suicide action plan
    recommendation = content_based_recommendation(
        stress_level=final_prediction,  # Use combined prediction
        physical_activity=physical_activity,
        sleep_quality=sleep_duration,
        bmi=bmi,
        blood_pressure=blood_pressure
    )

    action_plan = suicide_action_plan(
        stress_level=final_prediction,  # Use combined prediction
        sleep_quality=sleep_duration,
        physical_activity=physical_activity,
        daily_steps=daily_steps,
        bmi=bmi,
        blood_pressure=blood_pressure
    )

    # Output the results
    print(f"\nTest for Single Person's Data:")
    print(f"Gender: {gender}")
    print(f"Age: {age}")
    print(f"Sleep Duration: {sleep_duration} hours")
    print(f"Physical Activity Level: {physical_activity}")
    print(f"BMI: {bmi}")
    print(f"Blood Pressure: {blood_pressure} mmHg")
    print(f"Heart Rate: {heart_rate} bpm")
    print(f"Daily Steps: {daily_steps}")
    print(f"Combined Predicted Stress Level: {final_prediction}")
    print(f"Recommendations: {recommendation}")
    print(f"Suicide Action Plan: {action_plan}")

In [165]:
test_single_person(
    gender='Male',             # Gender: 'Male' or 'Female'
    age=30,                    # Age in years
    sleep_duration=6,          # Sleep Duration in hours
    physical_activity=3,       # Physical Activity level (1-10)
    bmi=25,                    # Body Mass Index
    blood_pressure=120,        # Blood Pressure in mmHg (e.g., 120/80)
    heart_rate=72,             # Heart Rate in beats per minute
    daily_steps=5000           # Daily Steps count
)



Test for Single Person's Data:
Gender: Male
Age: 30
Sleep Duration: 6 hours
Physical Activity Level: 3
BMI: 25
Blood Pressure: 120 mmHg
Heart Rate: 72 bpm
Daily Steps: 5000
Predicted Stress Level (Random Forest): 8
Predicted Stress Level (SVM): 3
Predicted Stress Level (KMeans): 1
Predicted Stress Level (XGBoost): 5
Final Predicted Stress Level (Majority Voting): 8
Recommendations: Meditation, Yoga, Reduce screen time, Regular Sleep
Suicide Action Plan: High Risk: Consult a licensed psychologist or psychiatrist immediately.


In [166]:
# Example user input for testing
combined_prediction(
    gender='Male',             # Gender: 'Male' or 'Female'
    age=30,                    # Age in years
    sleep_duration=6,          # Sleep Duration in hours
    physical_activity=3,       # Physical Activity level (1-10)
    bmi=25,                    # Body Mass Index
    blood_pressure=120,        # Blood Pressure in mmHg (e.g., 120/80)
    heart_rate=72,             # Heart Rate in beats per minute
    daily_steps=5000           # Daily Steps count
)



Test for Single Person's Data:
Gender: Male
Age: 30
Sleep Duration: 6 hours
Physical Activity Level: 3
BMI: 25
Blood Pressure: 120 mmHg
Heart Rate: 72 bpm
Daily Steps: 5000
Combined Predicted Stress Level: 1
Recommendations: Increase Physical Activity, Morning Walk, Breathing Exercises
Suicide Action Plan: Low Risk: Maintain a healthy lifestyle and stay socially connected.


In [170]:
# Combined function for testing a single person's data and making predictions with meta-model
def test_and_predict_stress_level_with_meta_model(gender, age, sleep_duration, physical_activity, bmi, blood_pressure, heart_rate, daily_steps):
    # Single input data (Modified to match original dataset structure)
    person_data = pd.DataFrame({
        'Gender': [gender],
        'Age': [age],
        'Sleep Duration': [sleep_duration],
        'Physical Activity Level': [physical_activity],
        'BMI': [bmi],
        'Blood Pressure': [blood_pressure],
        'Heart Rate': [heart_rate],
        'Daily Steps': [daily_steps]
    }, index=[0])  # Adding an index to avoid potential issues

    # Preprocess person_data to match the format used during training:
    # 1. One-hot encode categorical features
    person_data = pd.get_dummies(person_data, columns=['Gender'], drop_first=True)

    # 2. Ensure all columns from training data are present (add missing columns with 0 values)
    for col in X_train.columns:
        if col not in person_data.columns:
            person_data[col] = 0

    # 3. Reorder columns to match the order in training data
    person_data = person_data[X_train.columns]

    # Now you can safely scale the data
    person_scaled = scaler.transform(person_data)

    # Get predictions from base models
    base_model_preds = get_base_model_predictions(person_data)

    # Use the meta-model to predict stress level based on base model predictions
    meta_pred = meta_model.predict(base_model_preds)

    # Get stress relief recommendations
    recommendation = content_based_recommendation(
        stress_level=meta_pred[0],  # Use combined prediction
        physical_activity=physical_activity,
        sleep_quality=sleep_duration,
        bmi=bmi,
        blood_pressure=blood_pressure
    )

    # Suicide action plan
    action_plan = suicide_action_plan(
        stress_level=meta_pred[0],  # Use combined prediction
        sleep_quality=sleep_duration,
        physical_activity=physical_activity,
        daily_steps=daily_steps,
        bmi=bmi,
        blood_pressure=blood_pressure
    )

    # Output the results
    print(f"\nTest for Single Person's Data:")
    print(f"Gender: {gender}")
    print(f"Age: {age}")
    print(f"Sleep Duration: {sleep_duration} hours")
    print(f"Physical Activity Level: {physical_activity}")
    print(f"BMI: {bmi}")
    print(f"Blood Pressure: {blood_pressure} mmHg")
    print(f"Heart Rate: {heart_rate} bpm")
    print(f"Daily Steps: {daily_steps}")
    print(f"Predicted Stress Level (Meta-Model): {meta_pred[0]}")
    print(f"Recommendations: {recommendation}")
    print(f"Suicide Action Plan: {action_plan}")




In [174]:
# Example user input for testing with the meta-model
test_and_predict_stress_level_with_meta_model(
    gender='Male',             # Gender: 'Male' or 'Female'
    age=24,                    # Age in years
    sleep_duration=9.2,        # Sleep Duration in hours
    physical_activity=2,       # Physical Activity level (1-10)
    bmi=17.4,                  # Body Mass Index
    blood_pressure=90,         # Blood Pressure in mmHg (e.g., 120/80)
    heart_rate=84,             # Heart Rate in beats per minute
    daily_steps=992            # Daily Steps count
)


Test for Single Person's Data:
Gender: Male
Age: 24
Sleep Duration: 9.2 hours
Physical Activity Level: 2
BMI: 17.4
Blood Pressure: 90 mmHg
Heart Rate: 84 bpm
Daily Steps: 992
Predicted Stress Level (Meta-Model): 5
Recommendations: Increase Physical Activity, Morning Walk, Breathing Exercises
Suicide Action Plan: Low Risk: Maintain a healthy lifestyle and stay socially connected.
