In [110]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score


In [111]:
tr = pd.read_csv("/content/Training (1).csv")
te = pd.read_csv("/content/Testing (1).csv")

In [112]:
print(tr.columns)

Index(['itching', 'skin_rash', 'nodal_skin_eruptions', 'continuous_sneezing',
       'shivering', 'chills', 'joint_pain', 'stomach_pain', 'acidity',
       'ulcers_on_tongue',
       ...
       'blackheads', 'scurring', 'skin_peeling', 'silver_like_dusting',
       'small_dents_in_nails', 'inflammatory_nails', 'blister',
       'red_sore_around_nose', 'yellow_crust_ooze', 'prognosis'],
      dtype='object', length=133)


In [113]:
print("Basic Information of Training Dataset:")
print(tr.info())
print("\nBasic Information of Testing Dataset:")
print(te.info())

Basic Information of Training Dataset:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4920 entries, 0 to 4919
Columns: 133 entries, itching to prognosis
dtypes: int64(132), object(1)
memory usage: 5.0+ MB
None

Basic Information of Testing Dataset:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Columns: 133 entries, itching to prognosis
dtypes: int64(132), object(1)
memory usage: 42.7+ KB
None


In [114]:
print("First 5 rows of the dataset")
tr.head()

First 5 rows of the dataset


Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


In [115]:
symptoms = tr.columns[1:-1].tolist()
disease = tr['prognosis'].unique().tolist()

disease_mapping = {d: i for i, d in enumerate(disease)}
print(disease_mapping)

symptoms = np.array(symptoms)
disease = np.array(disease)

{'Fungal infection': 0, 'Allergy': 1, 'GERD': 2, 'Chronic cholestasis': 3, 'Drug Reaction': 4, 'Peptic ulcer diseae': 5, 'AIDS': 6, 'Diabetes ': 7, 'Gastroenteritis': 8, 'Bronchial Asthma': 9, 'Hypertension ': 10, 'Migraine': 11, 'Cervical spondylosis': 12, 'Paralysis (brain hemorrhage)': 13, 'Jaundice': 14, 'Malaria': 15, 'Chicken pox': 16, 'Dengue': 17, 'Typhoid': 18, 'hepatitis A': 19, 'Hepatitis B': 20, 'Hepatitis C': 21, 'Hepatitis D': 22, 'Hepatitis E': 23, 'Alcoholic hepatitis': 24, 'Tuberculosis': 25, 'Common Cold': 26, 'Pneumonia': 27, 'Dimorphic hemmorhoids(piles)': 28, 'Heart attack': 29, 'Varicose veins': 30, 'Hypothyroidism': 31, 'Hyperthyroidism': 32, 'Hypoglycemia': 33, 'Osteoarthristis': 34, 'Arthritis': 35, '(vertigo) Paroymsal  Positional Vertigo': 36, 'Acne': 37, 'Urinary tract infection': 38, 'Psoriasis': 39, 'Impetigo': 40}


In [116]:
tr.replace({'prognosis': disease_mapping}, inplace=True)
te.replace({'prognosis': disease_mapping}, inplace=True)

X_train = tr[symptoms]
y_train = tr[["prognosis"]]

X_test = te[symptoms]
y_test = te[["prognosis"]]

print(tr['prognosis'])

0        0
1        0
2        0
3        0
4        0
        ..
4915    36
4916    37
4917    38
4918    39
4919    40
Name: prognosis, Length: 4920, dtype: int64


In [131]:
class BernoulliNaiveBayes:
    def __init__(self):
        self.class_probs = None
        self.feature_probs = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.classes, class_counts = np.unique(y, return_counts=True)
        n_classes = len(self.classes)

        self.class_probs = class_counts / n_samples # Prior probability of each class

        self.feature_probs = np.zeros((n_classes, n_features)) # Probability of each feature being 1 given each class
        for i, c in enumerate(self.classes):
            class_mask = (y == c)
            class_samples = X[class_mask]
            self.feature_probs[i, :] = (np.sum(class_samples, axis=0) + 1) / (np.sum(class_mask) + 2) 

    def predict_proba(self, X): # Predict probability of each class for given input

        # Use log probabilities to handle numerical stability when working with small probabilities.
        class_probs_log = np.log(self.class_probs)
        feature_probs_log = np.log(self.feature_probs)

        X_array = np.array(X)

        # Compute log likelihoods of input data under each class
        log_likelihoods = X_array @ feature_probs_log.T + (1 - X_array) @ np.log(1 - feature_probs_log.T)

        # Compute posterior probabilities by adding log likelihoods to prior class probabilities
        log_probs = log_likelihoods + class_probs_log - np.max(log_likelihoods + class_probs_log, axis=1, keepdims=True)

        probs = np.exp(log_probs)

        # Normalize probabilities to ensure they sum to 1
        probs /= np.sum(probs, axis=1, keepdims=True)

        return [np.max(probs), self.classes[np.argmax(probs, axis=1)] ]


In [133]:
nb = BernoulliNaiveBayes()
nb.fit(X_train, np.ravel(y_train))

# Accuracy calculation
y_true = np.ravel(y_test)
y_pred = nb.predict_proba(X_test)
accuracy = accuracy_score(y_true, y_pred[1])
print(accuracy)

1.0


In [134]:
print("Available Symptoms:", ", ".join(symptoms))

user_input = input("Enter symptoms separated by commas (e.g., symptom1, symptom2): ")
user_input_symptoms = [symptom.strip() for symptom in user_input.split(',')]

symptoms2 = [0] * len(symptoms)
for i in range(0, len(symptoms)):
    for j in user_input_symptoms:
        if (j == symptoms[i]):
            symptoms2[i] = 1

iptest = [symptoms2]
predict = nb.predict_proba(iptest)

max_prob = predict[0]
disease_name = disease[predict[1]]

print(f"Identified disease is: {disease_name}")
print(f"Probability of Disease : {max_prob:.2%}")

Available Symptoms: skin_rash, nodal_skin_eruptions, continuous_sneezing, shivering, chills, joint_pain, stomach_pain, acidity, ulcers_on_tongue, muscle_wasting, vomiting, burning_micturition, spotting_ urination, fatigue, weight_gain, anxiety, cold_hands_and_feets, mood_swings, weight_loss, restlessness, lethargy, patches_in_throat, irregular_sugar_level, cough, high_fever, sunken_eyes, breathlessness, sweating, dehydration, indigestion, headache, yellowish_skin, dark_urine, nausea, loss_of_appetite, pain_behind_the_eyes, back_pain, constipation, abdominal_pain, diarrhoea, mild_fever, yellow_urine, yellowing_of_eyes, acute_liver_failure, fluid_overload, swelling_of_stomach, swelled_lymph_nodes, malaise, blurred_and_distorted_vision, phlegm, throat_irritation, redness_of_eyes, sinus_pressure, runny_nose, congestion, chest_pain, weakness_in_limbs, fast_heart_rate, pain_during_bowel_movements, pain_in_anal_region, bloody_stool, irritation_in_anus, neck_pain, dizziness, cramps, bruising, 