In [None]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


In [None]:
data=pd.read_csv("Final_Augmented_dataset_Diseases_and_Symptoms.csv")
print("shape of dataset:",data.shape)
data.head()

In [None]:
data.columns

In [None]:
X=data.drop("diseases",axis=1) #symptoms from all columns
y=data["diseases"] #target

print("Feature Shape:",X.shape)
print("Target Shape:",y.shape)
print("Unique Diseases:",y.nunique())

In [None]:
# removing diseases with only one symptoms
count=y.value_counts()
valid_classes=count[count>1].index
cleaned_data=data[data["diseases"].isin(valid_classes)]

#redefining X and Y
X=cleaned_data.drop("diseases",axis=1)
y=cleaned_data["diseases"]
print("Dataset Shape After Cleaning:",cleaned_data.shape)
print("Remaining Unique Diseases:",y.unique())


In [None]:
#splitting to train and test 
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42,stratify=y)
print("Training Data:",X_train.shape[0])
print("Testing data:",X_test.shape[0])

In [None]:
#training Naive Bayes model
model=MultinomialNB()
model.fit(X_train,y_train)

In [None]:
prediction=model.predict(X_test)

In [None]:
accuracy=accuracy_score(y_test,prediction)
print("Accuracy:",accuracy)

In [None]:
print("\nClassification Report:")
print(classification_report(y_test,prediction))

In [None]:
#confusion Matrix
cm=confusion_matrix(y_test,prediction)
plt.figure(figsize=(10,6))
sns.heatmap(cm[:20,:20],annot=False,cmap="Blues",cbar=True)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

In [None]:
#Accuracy Bar
plt.bar(["Naive Bayes"],[accuracy_score(y_test,prediction)])
plt.ylabel("Accuracy")
plt.title("Model Accuracy")
plt.show()

In [None]:
import joblib as jb
import ipywidgets as wid
from IPython.display import display

In [None]:
jb.dump(model,"Predict_disease.joblib")

In [None]:
model = jb.load("Predict_disease.joblib")
data = pd.read_csv("Final_Augmented_dataset_Diseases_and_Symptoms.csv")
X = data.drop("diseases", axis=1)
all_symptoms = X.columns.tolist()

symptoms_box = wid.Text(
    value="",
    placeholder="Enter symptoms separated by ,",
    description="Symptoms:",
    disabled=False
)
output = wid.Output()

def on_submit(change):
    with output:
        output.clear_output()
        user_text = change["new"].strip()
        
        # Prevent empty input
        if not user_text:
            print(" Please enter at least one symptom.")
            return
        
        # Process input symptoms
        user_input = [s.strip().lower() for s in user_text.split(",") if s.strip()]
        input_vector = pd.DataFrame([[1 if symptom.lower() in user_input else 0 for symptom in all_symptoms]], 
                                    columns=all_symptoms)
        
        # Predict disease
        result = model.predict(input_vector)
        print("Predicted Disease:", result[0])

# Only trigger on Enter key
symptoms_box.observe(on_submit, names="value")
display(symptoms_box, output)