In [2]:
import pandas as pd

# Load both datasets
df_symptoms = pd.read_csv('disease_and_symptoms.csv')
df_precautions = pd.read_csv('disease_precaution.csv')

# Show samples
print(df_symptoms.head())
print(df_precautions.head())


            Disease   Symptom_1              Symptom_2              Symptom_3  \
0  Fungal infection     itching              skin_rash   nodal_skin_eruptions   
1  Fungal infection   skin_rash   nodal_skin_eruptions    dischromic _patches   
2  Fungal infection     itching   nodal_skin_eruptions    dischromic _patches   
3  Fungal infection     itching              skin_rash    dischromic _patches   
4  Fungal infection     itching              skin_rash   nodal_skin_eruptions   

              Symptom_4 Symptom_5 Symptom_6 Symptom_7 Symptom_8 Symptom_9  \
0   dischromic _patches       NaN       NaN       NaN       NaN       NaN   
1                   NaN       NaN       NaN       NaN       NaN       NaN   
2                   NaN       NaN       NaN       NaN       NaN       NaN   
3                   NaN       NaN       NaN       NaN       NaN       NaN   
4                   NaN       NaN       NaN       NaN       NaN       NaN   

  Symptom_10 Symptom_11 Symptom_12 Symptom_13 Symp

In [3]:
# Fill NaN
df_symptoms.fillna('None', inplace=True)

# Get all unique symptoms
all_symptoms = pd.unique(df_symptoms.iloc[:, 1:].values.ravel())
all_symptoms = [symptom for symptom in all_symptoms if symptom != 'None']

# Convert each row into binary symptom vector
def encode_symptoms(row):
    symptoms = row[1:].values
    return pd.Series([1 if s in symptoms else 0 for s in all_symptoms], index=all_symptoms)

X = df_symptoms.apply(encode_symptoms, axis=1)
y = df_symptoms['Disease']

In [4]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

le = LabelEncoder()
y_encoded = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


In [5]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

model = MultinomialNB()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=le.classes_))


Accuracy: 1.0
                                         precision    recall  f1-score   support

(vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00        18
                                   AIDS       1.00      1.00      1.00        30
                                   Acne       1.00      1.00      1.00        24
                    Alcoholic hepatitis       1.00      1.00      1.00        25
                                Allergy       1.00      1.00      1.00        24
                              Arthritis       1.00      1.00      1.00        23
                       Bronchial Asthma       1.00      1.00      1.00        33
                   Cervical spondylosis       1.00      1.00      1.00        23
                            Chicken pox       1.00      1.00      1.00        21
                    Chronic cholestasis       1.00      1.00      1.00        15
                            Common Cold       1.00      1.00      1.00        23
             

In [6]:
# Simulated symptom input
input_symptoms = ['itching', 'fatigue', 'vomiting']

# Create input vector
input_vector = [1 if symptom in input_symptoms else 0 for symptom in all_symptoms]
input_vector = pd.DataFrame([input_vector], columns=all_symptoms)

# Predict disease
predicted = model.predict(input_vector)
disease_name = le.inverse_transform(predicted)[0]

print("Predicted Disease:", disease_name)

# Show precautions
precautions = df_precautions[df_precautions['Disease'] == disease_name]
print("\nRecommended Precautions:")
print(precautions.iloc[0, 1:].values)


Predicted Disease: Fungal infection

Recommended Precautions:
['bath twice' 'use detol or neem in bathing water'
 'keep infected area dry' 'use clean cloths']
