In [1]:
import pandas as pd


# Replace 'your_file.csv' with your actual file name
data = pd.read_csv('Sleep_health_and_lifestyle_dataset.csv')

# Check the first rows of your dataset
data.head()

# Check the first rows of your dataset
data.head()

Unnamed: 0,Person ID,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Blood Pressure,Heart Rate,Daily Steps,Sleep Disorder
0,1,Male,27,Software Engineer,6.1,6,42,6,Overweight,126/83,77,4200,
1,2,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
2,3,Male,28,Doctor,6.2,6,60,8,Normal,125/80,75,10000,
3,4,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea
4,5,Male,28,Sales Representative,5.9,4,30,8,Obese,140/90,85,3000,Sleep Apnea


In [2]:
# Basic information
data.info()

# Basic statistics
data.describe()

# Check if any missing values
data.isnull().sum()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 374 entries, 0 to 373
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Person ID                374 non-null    int64  
 1   Gender                   374 non-null    object 
 2   Age                      374 non-null    int64  
 3   Occupation               374 non-null    object 
 4   Sleep Duration           374 non-null    float64
 5   Quality of Sleep         374 non-null    int64  
 6   Physical Activity Level  374 non-null    int64  
 7   Stress Level             374 non-null    int64  
 8   BMI Category             374 non-null    object 
 9   Blood Pressure           374 non-null    object 
 10  Heart Rate               374 non-null    int64  
 11  Daily Steps              374 non-null    int64  
 12  Sleep Disorder           155 non-null    object 
dtypes: float64(1), int64(7), object(5)
memory usage: 38.1+ KB


Person ID                    0
Gender                       0
Age                          0
Occupation                   0
Sleep Duration               0
Quality of Sleep             0
Physical Activity Level      0
Stress Level                 0
BMI Category                 0
Blood Pressure               0
Heart Rate                   0
Daily Steps                  0
Sleep Disorder             219
dtype: int64

In [3]:
# Fix missing values in 'Sleep Disorder'
data['Sleep Disorder'].fillna('None', inplace=True)

# Verify fix
data['Sleep Disorder'].value_counts()


Sleep Disorder
None           219
Sleep Apnea     78
Insomnia        77
Name: count, dtype: int64

In [4]:
# Create binary target column 'Sleep_Disorder_Binary'
data['Sleep_Disorder_Binary'] = data['Sleep Disorder'].apply(lambda x: 0 if x == 'None' else 1)

# Verify clearly
data[['Sleep Disorder', 'Sleep_Disorder_Binary']].head(10)


Unnamed: 0,Sleep Disorder,Sleep_Disorder_Binary
0,,0
1,,0
2,,0
3,Sleep Apnea,1
4,Sleep Apnea,1
5,Insomnia,1
6,Insomnia,1
7,,0
8,,0
9,,0


In [5]:
# Selecting relevant features (X) and target (y)
features = ['Age', 'Sleep Duration', 'Quality of Sleep',
            'Physical Activity Level', 'Stress Level',
            'Heart Rate', 'Daily Steps']

X = data[features]
y = data['Sleep_Disorder_Binary']

In [6]:
pip install scikit-learn


You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.8/bin/python3.8 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [7]:
!python3 -m pip install --user scikit-learn

You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.8/bin/python3 -m pip install --upgrade pip' command.[0m


In [8]:
# Split data again clearly
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train your model clearly
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Check accuracy clearly
from sklearn.metrics import accuracy_score, classification_report
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy*100:.2f}%")
print(classification_report(y_test, y_pred))

Model Accuracy: 96.00%
              precision    recall  f1-score   support

           0       0.95      0.98      0.97        43
           1       0.97      0.94      0.95        32

    accuracy                           0.96        75
   macro avg       0.96      0.96      0.96        75
weighted avg       0.96      0.96      0.96        75



In [9]:
import joblib

# Define the correct feature set (only the ones used during training)
feature_columns = ['Age', 'Sleep Duration', 'Quality of Sleep',
                   'Physical Activity Level', 'Stress Level',
                   'Heart Rate', 'Daily Steps']

# Save the trained model along with feature names
joblib.dump((model, feature_columns), "helixy_sleep_disorder_model.pkl")

print("Model and feature columns saved successfully!")


Model and feature columns saved successfully!


In [10]:

import os

# Define the correct project path (Update if needed)
project_path = os.path.expanduser("~/Documents/helixy-sleep-disorder")

# Create the folder if it doesn't exist
if not os.path.exists(project_path):
    os.makedirs(project_path)
    print(f"📂 Created project folder at: {project_path}")
else:
    print(f"✅ Project folder already exists at: {project_path}")

# Move to the correct directory
os.chdir(project_path)
print("📂 Now working in:", os.getcwd())


✅ Project folder already exists at: /Users/sandeshneupane/Documents/helixy-sleep-disorder
📂 Now working in: /Users/sandeshneupane/Documents/helixy-sleep-disorder


In [12]:
import os

# Print current working directory
print("📂 Current directory:", os.getcwd())

# List all files in the current directory
print("📄 Files in this directory:", os.listdir())


📂 Current directory: /Users/sandeshneupane/Documents/helixy-sleep-disorder
📄 Files in this directory: []


In [13]:
import os

# Search for the model file in your entire home directory
for root, dirs, files in os.walk(os.path.expanduser("~")):
    if "helixy_sleep_disorder_model.pkl" in files:
        print("Model file found at:", os.path.join(root, "helixy_sleep_disorder_model.pkl"))


Model file found at: /Users/sandeshneupane/helixy_sleep_disorder_model.pkl


In [16]:
import shutil

# Define source path (where the model is now)
source_path = "/Users/sandeshneupane/helixy_sleep_disorder_model.pkl"

# Define destination path (where we want to move it)
destination_path = "/Users/sandeshneupane/Documents/helixy-sleep-disorder/helixy_sleep_disorder_model.pkl"

# Move the file
shutil.move(source_path, destination_path)

print("Model file successfully moved to the project folder!")


FileNotFoundError: [Errno 2] No such file or directory: '/Users/sandeshneupane/helixy_sleep_disorder_model.pkl'

In [17]:
import os

# List files inside the project folder
print("📄 Files in project folder:", os.listdir("/Users/sandeshneupane/Documents/helixy-sleep-disorder"))


📄 Files in project folder: ['helixy_sleep_disorder_model.pkl']


In [19]:
import os

# Remove the old GitHub remote link (if it exists)
os.system("git remote remove origin")

# Add GitHub remote with the new token
GITHUB_USERNAME = "Trishna1212"
GITHUB_PAT = "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"

# Add the remote repository using token authentication
os.system(f"git remote add origin https://{GITHUB_USERNAME}:{GITHUB_PAT}@github.com/Trishna1212/helixy-sleep-disorder.git")

print("GitHub authentication updated successfully!")


GitHub authentication updated successfully!


In [20]:
# Push to GitHub
os.system("git push -u origin main")

print("Model successfully pushed to GitHub!")


Model successfully pushed to GitHub!


remote: Repository not found.
fatal: repository 'https://github.com/Trishna1212/helixy-sleep-disorder.git/' not found


In [22]:
import os

# Remove the old incorrect remote link
os.system("git remote remove origin")

# Add the correct repository URL (Replace with the correct username if needed)
GITHUB_USERNAME = "Trishna1212"
GITHUB_PAT = "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"

# Add the correct GitHub remote link
os.system(f"git remote add origin https://{GITHUB_USERNAME}:{GITHUB_PAT}@github.com/{GITHUB_USERNAME}/helixy-sleep-disorder.git")

print("GitHub remote repository updated successfully!")


GitHub remote repository updated successfully!


In [23]:
# Push to GitHub
os.system("git push -u origin main")

print("Model successfully pushed to GitHub!")


branch 'main' set up to track 'origin/main'.
Model successfully pushed to GitHub!


remote: This repository moved. Please use the new location:        
remote:   https://github.com/Trishna1212/Helixy-sleep-disorder.git        
To https://github.com/Trishna1212/helixy-sleep-disorder.git
 * [new branch]      main -> main


In [24]:
import os

# Define GitHub repository details
GITHUB_USERNAME = "Trishna1212"
REPO_NAME = "Helixy-sleep-disorder"
MODEL_FILENAME = "helixy_sleep_disorder_model.pkl"

# Clone the repository (if not already cloned)
if not os.path.exists(REPO_NAME):
    os.system(f"git clone https://github.com/{GITHUB_USERNAME}/{REPO_NAME}.git")
    print("Repository cloned successfully!")

# Move into the project directory
os.chdir(REPO_NAME)

# Verify if the model file exists
if os.path.exists(MODEL_FILENAME):
    print("Model file found in GitHub repository!")
else:
    print("Model file not found. Please check GitHub repository.")


Cloning into 'Helixy-sleep-disorder'...


Repository cloned successfully!
Model file found in GitHub repository!


In [25]:
import joblib

# Load the model along with feature names
model, feature_columns = joblib.load("helixy_sleep_disorder_model.pkl")

print("Model loaded successfully!")
print("Expected Features:", feature_columns)


Model loaded successfully!
Expected Features: ['Age', 'Sleep Duration', 'Quality of Sleep', 'Physical Activity Level', 'Stress Level', 'Heart Rate', 'Daily Steps']
