In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

In [None]:
df = pd.read_csv('Sleep_health_and_lifestyle_dataset.csv')
df[['Systolic_BP', 'Diastolic_BP']] = df['Blood Pressure'].str.split('/', expand=True).astype(int)
df = df.drop(['Person ID', 'Occupation', 'Blood Pressure'], axis=1)
print("Missing values:\n", df.isnull().sum())

In [None]:
le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])
df['BMI Category'] = le.fit_transform(df['BMI Category'])
df['Sleep Disorder'] = le.fit_transform(df['Sleep Disorder'])

In [None]:
X = df.drop('Sleep Disorder', axis=1)
y = df['Sleep Disorder']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

print("\nAccuracy Score:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

In [None]:
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

In [None]:
sample_input = {
    'Gender': 'Male',
    'Age': 35,
    'Sleep Duration': 7.5,
    'Quality of Sleep': 8,
    'Physical Activity Level': 60,
    'Stress Level': 5,
    'BMI Category': 'Normal',
    'Heart Rate': 70,
    'Daily Steps': 8000,
    'Systolic_BP': 120,
    'Diastolic_BP': 80
}

sample_df = pd.DataFrame([sample_input])
sample_df['Gender'] = le.fit_transform(sample_df['Gender'])
sample_df['BMI Category'] = le.fit_transform(sample_df['BMI Category'])

sample_scaled = scaler.transform(sample_df)

prediction = knn.predict(sample_scaled)
prediction_proba = knn.predict_proba(sample_scaled)

class_names = le.classes_

print("\nPredicted Sleep Disorder:", class_names[prediction[0]])
print("Prediction Probabilities:", dict(zip(class_names, prediction_proba[0])))