In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import classification_report, confusion_matrix


data = pd.read_csv("Disease and symptoms dataset.csv")


min_samples = 5
class_counts = data["diseases"].value_counts()
valid_classes = class_counts[class_counts >= min_samples].index
filtered_data = data[data["diseases"].isin(valid_classes)]


y, disease_names = filtered_data["diseases"].factorize()
X = filtered_data.drop("diseases", axis=1).astype(np.float32)


X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,
    random_state=42
)


X_train_sparse = csr_matrix(X_train)
X_test_sparse = csr_matrix(X_test)


model = BernoulliNB()
model.fit(X_train_sparse, y_train)


y_pred = model.predict(X_test_sparse)


print("\nClassification Report:")
print(classification_report(
    y_test, y_pred,
    target_names=disease_names,
    zero_division=0
))




Classification Report:
                                                          precision    recall  f1-score   support

                                          panic disorder       0.86      0.99      0.92       182
                                        vocal cord polyp       1.00      0.61      0.76        18
                                          cryptorchidism       0.00      0.00      0.00         1
                        poisoning due to ethylene glycol       0.00      0.00      0.00         5
                                      atrophic vaginitis       0.91      0.77      0.84        66
                                    fracture of the hand       1.00      0.41      0.58        22
                          cellulitis or abscess of mouth       1.00      0.93      0.96        28
                                  eye alignment disorder       0.92      0.96      0.94        91
                          headache after lumbar puncture       1.00      0.85      0.92      