In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


file_path = '/content/Sleep_health_and_lifestyle_dataset.csv'
data = pd.read_csv(file_path)


In [None]:
data.head()

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 374 entries, 0 to 373
Data columns (total 15 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Person ID                       374 non-null    int64  
 1   Gender                          374 non-null    object 
 2   Age                             374 non-null    int64  
 3   Occupation                      374 non-null    object 
 4   Sleep Duration                  374 non-null    float64
 5   Quality of Sleep                374 non-null    int64  
 6   Physical Activity Level         374 non-null    int64  
 7   Stress Level                    374 non-null    int64  
 8   BMI Category                    374 non-null    object 
 9   Blood Pressure                  374 non-null    object 
 10  Heart Rate                      374 non-null    int64  
 11  Daily Steps                     374 non-null    int64  
 12  Sleep Disorder                  155 

In [None]:
data.columns

Index(['Person ID', 'Gender', 'Age', 'Occupation', 'Sleep Duration',
       'Quality of Sleep', 'Physical Activity Level', 'Stress Level',
       'BMI Category', 'Blood Pressure', 'Heart Rate', 'Daily Steps',
       'Sleep Disorder'],
      dtype='object')

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split



# Creating target variables
data['Needs Sleep Improvement'] = (data['Sleep Duration'] < 7) | (data['Quality of Sleep'] < 6)
data['Needs Water Intake Improvement'] = (data['Physical Activity Level'] > 70) | (data['Stress Level'] > 7)

# Encoding the categorical feature 'BMI Category'
data_encoded = pd.get_dummies(data, columns=['BMI Category'], drop_first=True)

# Collect unique BMI categories for future use in prediction
bmi_categories = data['BMI Category'].unique()

# Features for the model
features = ['Sleep Duration', 'Quality of Sleep', 'Physical Activity Level', 'Stress Level', 'Daily Steps'] + \
           list(data_encoded.columns[data_encoded.columns.str.startswith('BMI Category_')])

# Splitting the data into training and testing sets
X = data_encoded[features]
y_sleep = data['Needs Sleep Improvement']
y_water = data['Needs Water Intake Improvement']

X_train_sleep, X_test_sleep, y_train_sleep, y_test_sleep = train_test_split(X, y_sleep, test_size=0.3, random_state=42)
X_train_water, X_test_water, y_train_water, y_test_water = train_test_split(X, y_water, test_size=0.3, random_state=42)

# Training the RandomForestClassifier for sleep improvement prediction
model_sleep = RandomForestClassifier(random_state=42)
model_sleep.fit(X_train_sleep, y_train_sleep)

# Training the RandomForestClassifier for water intake improvement prediction
model_water = RandomForestClassifier(random_state=42)
model_water.fit(X_train_water, y_train_water)


def calculate_sleep_score(sleep_duration, quality_of_sleep):
    sleep_score = 0

    # scoring for Sleep Duration
    if sleep_duration >= 8:
        sleep_score += 20
    elif sleep_duration >= 7:
        sleep_score += 15
    elif sleep_duration >= 6:
        sleep_score += 10

    # scoring for Quality of Sleep
    if quality_of_sleep >= 8:
        sleep_score += 20
    elif quality_of_sleep >= 7:
        sleep_score += 15
    elif quality_of_sleep >= 6:
        sleep_score += 10

    # to ensure the score does not exceed 50:-
    return min(sleep_score, 50)

def calculate_water_score(physical_activity_level, stress_level):
    water_score = 0

    # scoring for Physical Activity Level
    if physical_activity_level >= 60:
        water_score += 20
    elif physical_activity_level >= 60:
        water_score += 15
    elif physical_activity_level >= 50:
        water_score += 10

    # scoring for Stress Level
    if stress_level <= 3:
        water_score += 20
    elif stress_level <= 5:
        water_score += 15
    elif stress_level <= 7:
        water_score += 10

    # Ensure the score does not exceed 50
    return min(water_score, 50)

# Function to score the person’s health out of 100
def score_health(sleep_duration, quality_of_sleep, physical_activity_level, stress_level, daily_steps, bmi_category):
    # Create a DataFrame for the input
    input_data = pd.DataFrame({
        'Sleep Duration': [sleep_duration],
        'Quality of Sleep': [quality_of_sleep],
        'Physical Activity Level': [physical_activity_level],
        'Stress Level': [stress_level],
        'Daily Steps': [daily_steps]
    })

    # Adding missing BMI category columns
    for category in bmi_categories:
        input_data[f'BMI Category_{category}'] = 0
    input_data[f'BMI Category_{bmi_category}'] = 1

    # Reordering columns to match training data
    input_data = input_data.reindex(columns=X.columns, fill_value=0)

    # Prediction using the trained models
    sleep_prediction = model_sleep.predict(input_data)[0]
    water_prediction = model_water.predict(input_data)[0]

    # Calculating scores based on input values
    sleep_score = calculate_sleep_score(sleep_duration, quality_of_sleep)
    water_score = calculate_water_score(physical_activity_level, stress_level)

    # to calculate total score out of 100 :-
    total_score = sleep_score + water_score

    # Score interpretation
    sleep_message = "You need to improve your sleep." if sleep_score < 40 else "Your sleep is fine."
    water_message = "You need to increase your water intake." if water_score < 40 else "Your water intake is fine."

    return total_score, sleep_message, water_message







In [None]:
# Example input
sleep_duration = 12
quality_of_sleep = 10
physical_activity_level = 30
stress_level = 1
daily_steps = 10000
bmi_category = 'normal'

# Scoring based on input
total_score, sleep_message, water_message = score_health(sleep_duration, quality_of_sleep, physical_activity_level, stress_level, daily_steps, bmi_category)

print(f"Health Score: {total_score}/100")
print(sleep_message)
print(water_message)



Health Score: 60/100
Your sleep is fine.
You need to increase your water intake.


In [None]:
import joblib

# Saving the trained models
joblib.dump(model_sleep, 'model_sleep.pkl')
joblib.dump(model_water, 'model_water.pkl')


['model_water.pkl']

In [None]:
joblib.dump(model_water, 'model_water.pkl')

['model_water.pkl']