In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

In [2]:
dt = pd.read_csv("Sleep_health_and_lifestyle_dataset.csv", header = 0)

In [3]:
dt

Unnamed: 0,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
...,...,...,...,...,...,...,...,...,...,...,...,...
369,Female,59,Nurse,8.1,9,75,3,Overweight,140/95,68,7000,Sleep Apnea
370,Female,59,Nurse,8.0,9,75,3,Overweight,140/95,68,7000,Sleep Apnea
371,Female,59,Nurse,8.1,9,75,3,Overweight,140/95,68,7000,Sleep Apnea
372,Female,59,Nurse,8.1,9,75,3,Overweight,140/95,68,7000,Sleep Apnea


In [3]:
# Assuming 'data' is your DataFrame and 'columns_to_drop' contains the names of columns to be dropped
columns_to_drop = ['Gender','Occupation']  # Replace these with your actual column names

# Dropping columns from the DataFrame
dt.drop(columns=columns_to_drop, inplace=True)  # inplace=True modifies the DataFrame in place


In [5]:
dt

Unnamed: 0,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,27,6.1,6,42,6,Overweight,126/83,77,4200,
1,28,6.2,6,60,8,Normal,125/80,75,10000,
2,28,6.2,6,60,8,Normal,125/80,75,10000,
3,28,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,28,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
...,...,...,...,...,...,...,...,...,...,...
369,59,8.1,9,75,3,Overweight,140/95,68,7000,Sleep Apnea
370,59,8.0,9,75,3,Overweight,140/95,68,7000,Sleep Apnea
371,59,8.1,9,75,3,Overweight,140/95,68,7000,Sleep Apnea
372,59,8.1,9,75,3,Overweight,140/95,68,7000,Sleep Apnea


In [4]:
# Split Blood Pressure into systolic and diastolic columns
dt[['systolic_bp', 'diastolic_bp']] = dt['Blood Pressure'].str.split('/', expand=True)

# Convert the new columns to numeric data type
dt['systolic_bp'] = pd.to_numeric(dt['systolic_bp'])
dt['diastolic_bp'] = pd.to_numeric(dt['diastolic_bp'])

# Drop the original Blood Pressure column
dt.drop(columns=['Blood Pressure'], inplace=True)

In [7]:
dt

Unnamed: 0,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,systolic_bp,diastolic_bp
0,27,6.1,6,42,6,Overweight,77,4200,,126,83
1,28,6.2,6,60,8,Normal,75,10000,,125,80
2,28,6.2,6,60,8,Normal,75,10000,,125,80
3,28,5.9,4,30,8,Obese,85,3000,Sleep Apnea,140,90
4,28,5.9,4,30,8,Obese,85,3000,Sleep Apnea,140,90
...,...,...,...,...,...,...,...,...,...,...,...
369,59,8.1,9,75,3,Overweight,68,7000,Sleep Apnea,140,95
370,59,8.0,9,75,3,Overweight,68,7000,Sleep Apnea,140,95
371,59,8.1,9,75,3,Overweight,68,7000,Sleep Apnea,140,95
372,59,8.1,9,75,3,Overweight,68,7000,Sleep Apnea,140,95


In [9]:
def handle_BMI(x):
    if x == "Overweight": return 0
    elif x == "Normal" : return 1
    elif x == "Obese" : return 2
    elif x == "Normal Weight": return 3

def handle_Disorder(x):
    if pd.isnull(x)  : return 0 # if its null value so i assume it's 'normal'
    elif x == "Sleep Apnea": return 1
    elif x == "Insomnia" : return 2

In [5]:
def handle_BMI(x):
    if x == "Overweight": return 0
    elif x == "Normal" : return 1
    elif x == "Obese" : return 2
    elif x == "Normal Weight": return 3

def handle_Disorder(x):
    if  x== "None": return 0 # if its null value so i assume it's 'normal'
    elif x == "Sleep Apnea": return 1
    elif x == "Insomnia" : return 2

In [6]:
dt['BMI Category'] = dt['BMI Category'].apply(handle_BMI)
dt['Sleep Disorder'] = dt['Sleep Disorder'].apply(handle_Disorder)

In [7]:
dt

Unnamed: 0,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,systolic_bp,diastolic_bp
0,27,6.1,6,42,6,0,77,4200,0,126,83
1,28,6.2,6,60,8,1,75,10000,0,125,80
2,28,6.2,6,60,8,1,75,10000,0,125,80
3,28,5.9,4,30,8,2,85,3000,1,140,90
4,28,5.9,4,30,8,2,85,3000,1,140,90
...,...,...,...,...,...,...,...,...,...,...,...
369,59,8.1,9,75,3,0,68,7000,1,140,95
370,59,8.0,9,75,3,0,68,7000,1,140,95
371,59,8.1,9,75,3,0,68,7000,1,140,95
372,59,8.1,9,75,3,0,68,7000,1,140,95


In [9]:
dt.isnull().sum()

Age                        0
Sleep Duration             0
Quality of Sleep           0
Physical Activity Level    0
Stress Level               0
BMI Category               0
Heart Rate                 0
Daily Steps                0
Sleep Disorder             0
systolic_bp                0
diastolic_bp               0
dtype: int64

In [17]:
dt

Unnamed: 0,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,systolic_bp,diastolic_bp
0,27,6.1,6,42,6,0,77,4200,0,126,83
1,28,6.2,6,60,8,1,75,10000,0,125,80
2,28,6.2,6,60,8,1,75,10000,0,125,80
3,28,5.9,4,30,8,2,85,3000,1,140,90
4,28,5.9,4,30,8,2,85,3000,1,140,90
...,...,...,...,...,...,...,...,...,...,...,...
369,59,8.1,9,75,3,0,68,7000,1,140,95
370,59,8.0,9,75,3,0,68,7000,1,140,95
371,59,8.1,9,75,3,0,68,7000,1,140,95
372,59,8.1,9,75,3,0,68,7000,1,140,95


In [10]:
X = dt.drop(columns=["Sleep Disorder"])
Y = dt["Sleep Disorder"]

In [12]:
X

Unnamed: 0,Age,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,systolic_bp,diastolic_bp
0,27,6.1,6,42,6,0,77,4200,126,83
1,28,6.2,6,60,8,1,75,10000,125,80
2,28,6.2,6,60,8,1,75,10000,125,80
3,28,5.9,4,30,8,2,85,3000,140,90
4,28,5.9,4,30,8,2,85,3000,140,90
...,...,...,...,...,...,...,...,...,...,...
369,59,8.1,9,75,3,0,68,7000,140,95
370,59,8.0,9,75,3,0,68,7000,140,95
371,59,8.1,9,75,3,0,68,7000,140,95
372,59,8.1,9,75,3,0,68,7000,140,95


In [13]:
Y

0      0
1      0
2      0
3      1
4      1
      ..
369    1
370    1
371    1
372    1
373    1
Name: Sleep Disorder, Length: 374, dtype: int64

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [17]:
# Creating Random Forest Classifier
RF_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Training the model
RF_model.fit(X_train, y_train)

# Making predictions
y_pred = RF_model.predict(X_test)

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.88


In [18]:
def output(x):
    if x == 0  : return 'normal'
    elif x == 1: return "Sleep Apnea"
    elif x == 2: return "Insomnia"

In [19]:
new_data = pd.DataFrame({
    'Age': [28],
    'Sleep Duration': [5.9],
    'Quality of Sleep': [4],
    'Physical Activity Level': [30],
    'Stress Level': [8],
    'BMI Category': [2],
    'Heart Rate': [85],
    'Daily Steps': [3000],
    'systolic_bp':[125],
    'diastolic_bp':[80],
})

In [20]:
prediction = RF_model.predict(new_data)
print (output (prediction[0]))

normal


In [20]:
joblib.dump(RF_model,"RandomFores.sav")

['RandomFores.sav']