# import libraries

In [1]:
import numpy as np
import pandas as pd
import sklearn as sk
from sklearn.linear_model import LinearRegression as lr
from sklearn.model_selection import train_test_split as tts
%matplotlib inline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler,MinMaxScaler
import joblib

# Load Data

In [9]:
dt = pd.read_csv("Sleep_health_and_lifestyle_dataset.csv", header = 0)
dt

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,370,Female,59,Nurse,8.1,9,75,3,Overweight,140/95,68,7000,Sleep Apnea
370,371,Female,59,Nurse,8.0,9,75,3,Overweight,140/95,68,7000,Sleep Apnea
371,372,Female,59,Nurse,8.1,9,75,3,Overweight,140/95,68,7000,Sleep Apnea
372,373,Female,59,Nurse,8.1,9,75,3,Overweight,140/95,68,7000,Sleep Apnea


In [10]:
dt.columns

Index(['Person ID', 'Gender', 'Age', 'Occupation', 'Sleep Duration',
       'Quality of Sleep', 'Physical Activity Level', 'Stress Level',
       'BMI Category', 'Blood Pressure', 'Heart Rate', 'Daily Steps',
       'Sleep Disorder'],
      dtype='object')

# Drop useless features

In [None]:
# Assuming 'data' is your DataFrame and 'columns_to_drop' contains the names of columns to be dropped
columns_to_drop = ['Person ID','Gender','Occupation','Blood Pressure']  # Replace these with your actual column names

# Dropping columns from the DataFrame
dt.drop(columns=columns_to_drop, inplace=True)  # inplace=True modifies the DataFrame in place


# Handle string to numeric values

In [13]:
def handle_BMI(x):
    if x == "Overweight": return 0
    elif x == "Normal" : return 1
    elif x == "Obese" : return 2
    elif x == "Normal Weight": return 3
    
def handle_Disorder(x):    
    if pd.isnull(x)  : return 0 # if its null value so i assume it's 'normal'
    elif x == "Sleep Apnea": return 1
    elif x == "Insomnia" : return 2


In [14]:
dt['BMI Category'] = dt['BMI Category'].apply(handle_BMI)
dt['Sleep Disorder'] = dt['Sleep Disorder'].apply(handle_Disorder)

In [15]:
dt.isnull().sum()

Age                        0
Sleep Duration             0
Quality of Sleep           0
Physical Activity Level    0
Stress Level               0
BMI Category               0
Heart Rate                 0
Daily Steps                0
Sleep Disorder             0
dtype: int64

# split the data to training and testing

In [19]:
X = dt.drop(columns=["Sleep Disorder"])
Y = dt["Sleep Disorder"]

In [20]:
X_train, X_test, Y_train, Y_test = tts(X, Y, test_size = 0.2, random_state = 5)
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(299, 8)
(75, 8)
(299,)
(75,)


# Train the Model

In [21]:
model = LogisticRegression()

In [22]:
model.fit(X_train,Y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


# Test the Model

In [24]:
model.score(X_test,Y_test)

0.8266666666666667

# How to get prediction

In [29]:
def output(x):    
    if x == 0  : return 'normal'
    elif x == 1: return "Sleep Apnea"
    elif x == 2: return "Insomnia" 


**Example**

In [35]:
# Example value, replace with the actual value
new_data = pd.DataFrame({
    'Age': [28],  
    'Sleep Duration': [5.9],  
    'Quality of Sleep': [4],  
    'Physical Activity Level': [30],  
    'Stress Level': [8], 
    'BMI Category': [2], 
    'Heart Rate': [80],  
    'Daily Steps': [1000]
})

In [36]:
prediction = model.predict(new_data)
print (output (prediction[0]))

Insomnia


# Save the Model

In [37]:
joblib.dump(model,"model.sav")

['model.sav']