<a href="https://colab.research.google.com/github/Varsha7330/classification/blob/main/Assignment7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**About Dataset**                                                          
Dataset Overview:                                                         
The Sleep Health and Lifestyle Dataset provides detailed insights into the sleep patterns, daily habits, and lifestyle factors of individuals. This synthetic dataset comprises 400 rows and 13 columns, covering essential metrics such as sleep duration, sleep quality, physical activity levels, stress, BMI category, cardiovascular health, and the presence of sleep disorders.

In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [5]:
# Load the dataset
file_path = 'sleep_health_lifestyle_dataset.csv'
data = pd.read_csv("/content/sleep_health_lifestyle_dataset.csv")
print(file_path)
print(data)

sleep_health_lifestyle_dataset.csv
     Person ID  Gender  Age     Occupation  Sleep Duration (hours)  \
0            1    Male   29   Manual Labor                     7.4   
1            2  Female   43        Retired                     4.2   
2            3    Male   44        Retired                     6.1   
3            4    Male   29  Office Worker                     8.3   
4            5    Male   67        Retired                     9.1   
..         ...     ...  ...            ...                     ...   
395        396  Female   36        Student                     4.5   
396        397  Female   45   Manual Labor                     6.0   
397        398  Female   30        Student                     5.3   
398        399  Female   41        Retired                    11.0   
399        400    Male   37        Retired                     5.8   

     Quality of Sleep (scale: 1-10)  Physical Activity Level (minutes/day)  \
0                               7.0           

In [4]:
# Drop unnecessary columns and preprocess categorical features
data_cleaned = data.drop(columns=["Person ID"])

In [6]:
# Handle 'Blood Pressure' by splitting into systolic and diastolic
data_cleaned[['Systolic', 'Diastolic']] = data_cleaned['Blood Pressure (systolic/diastolic)'].str.split('/', expand=True).astype(float)
data_cleaned = data_cleaned.drop(columns=["Blood Pressure (systolic/diastolic)"])

In [7]:
# Label encode categorical columns
categorical_cols = ['Gender', 'Occupation', 'BMI Category', 'Sleep Disorder']
label_encoders = {col: LabelEncoder() for col in categorical_cols}
for col in categorical_cols:
    data_cleaned[col] = label_encoders[col].fit_transform(data_cleaned[col])

In [8]:
# Separate features and target variable
X = data_cleaned.drop(columns=["Sleep Disorder"])
y = data_cleaned["Sleep Disorder"]

# Standardize numeric features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [9]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [10]:
# Initialize models
models = {
    "Logistic Regression": LogisticRegression(random_state=42),
    "Naïve Bayes": GaussianNB(),
    "KNN": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "SVC": SVC(random_state=42)
}

In [13]:
# Train and evaluate each model
results = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    results[model_name] = {
        "Accuracy": report["accuracy"],
        "Precision": report["weighted avg"]["precision"],
        "Recall": report["weighted avg"]["recall"],
        "F1-Score": report["weighted avg"]["f1-score"]
    }
# Display results
results_df = pd.DataFrame(results).T
print(results_df)

                     Accuracy  Precision    Recall  F1-Score
Logistic Regression  0.700000   0.732423  0.700000  0.584528
Naïve Bayes          0.700000   0.732423  0.700000  0.584528
KNN                  0.650000   0.566195  0.650000  0.589242
Decision Tree        0.508333   0.488011  0.508333  0.497475
SVC                  0.691667   0.478403  0.691667  0.565599


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
