In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# 加载数据
data = pd.read_csv('df_clean.csv')

# 预处理
data_clean = data.drop(columns=['Unnamed: 0', 'Person ID'])  # 移除不相关的列
X = data_clean.drop('Sleep Disorder', axis=1)
y = data_clean['Sleep Disorder']

# 数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 划分数据集
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25, random_state=42)


nb_model = GaussianNB()

# 训练模型
nb_model.fit(X_train, y_train)

# 预测测试数据
y_pred = nb_model.predict(X_test)

# 评估模型
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f"Test Accuracy: {accuracy:.2f}")

print("Classification Report:")
print(class_report)


Test Accuracy: 0.95
Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.95      0.95        55
           1       0.93      0.95      0.94        39

    accuracy                           0.95        94
   macro avg       0.94      0.95      0.95        94
weighted avg       0.95      0.95      0.95        94

