# dataset loading

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("/content/Training.csv")


In [3]:
# Remove duplicate rows
df = df.drop_duplicates()

In [4]:
df.head(4) # 1 means yes , 0 means no

Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


In [5]:
df.shape # rows , cols

(304, 133)

In [6]:
df["prognosis"].unique()

array(['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
       'Drug Reaction', 'Peptic ulcer diseae', 'AIDS', 'Diabetes ',
       'Gastroenteritis', 'Bronchial Asthma', 'Hypertension ', 'Migraine',
       'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
       'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'hepatitis A',
       'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
       'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
       'Dimorphic hemmorhoids(piles)', 'Heart attack', 'Varicose veins',
       'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
       'Osteoarthristis', 'Arthritis',
       '(vertigo) Paroymsal  Positional Vertigo', 'Acne',
       'Urinary tract infection', 'Psoriasis', 'Impetigo'], dtype=object)

In [7]:
df["prognosis"].unique().shape

(41,)

# split dataset for train and testing

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [9]:
# selecting lebel and features

x = df.drop("prognosis" , axis = 1)
y = df["prognosis"]

In [10]:
y

Unnamed: 0,prognosis
0,Fungal infection
1,Fungal infection
2,Fungal infection
3,Fungal infection
4,Fungal infection
...,...
402,Impetigo
403,Impetigo
405,Impetigo
406,Impetigo


In [11]:
# converting string to np array

label = LabelEncoder()

label.fit(y)
y_array = label.transform(y)

# y_array

In [12]:
# split data into 70:30
x_train , x_test , y_train , y_test =  train_test_split(x , y_array , test_size= 0.3, random_state= 42)

In [13]:
print(f"x_train--{x_train.shape}")
print(f"y_test--{x_test.shape}")
print(f"y_train--{y_train.shape}")
print(f"y_test--{y_test.shape}")

x_train--(212, 132)
y_test--(92, 132)
y_train--(212,)
y_test--(92,)


# model training

In [14]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import VotingClassifier

from sklearn.metrics import accuracy_score , mean_squared_error , precision_score , f1_score , recall_score

In [15]:
# KNeighborsClassifier:
# accuracy: 92.39%
# RMSE: 1.964356294609996

# GaussianNB
# accuracy: 100.00%
# RMSE: 0.0

# SVC:
# accuracy: 95.65%
# RMSE: 2.776062272199677

# LogisticRegression
# accuracy: 100.00%
# RMSE: 0.0

# I have used this models but to get better result I have used two models combinations:
# GaussianNB and LogisticRegression

In [16]:
# GaussianNB + LogisticRegression

gnb = GaussianNB()
lr = LogisticRegression(random_state= 42)

voting_clf = VotingClassifier(estimators=[('gnb', gnb), ('lr', lr)], voting='soft')

voting_clf.fit(x_train, y_train)

y_pred = voting_clf.predict(x_test)

accuracy = accuracy_score(y_test , y_pred)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f"accuracy: {accuracy * 100:.2f}%")

print(f"RMSE: {rmse}")

precision = precision_score(y_test, y_pred, average='micro')
print(f"Precision: {precision:.2f}")

f1 = f1_score(y_test, y_pred, average='micro')
print(f"F1 Score: {f1:.2f}")

recall = recall_score(y_test, y_pred, average='micro')
print(f"Recall: {recall:.2f}")

accuracy: 100.00%
RMSE: 0.0
Precision: 1.00
F1 Score: 1.00
Recall: 1.00


In [17]:
my_model = voting_clf

In [18]:
import pickle
pickle.dump(my_model,open('model.pkl','wb'))

In [19]:
my_model = pickle.load(open('model.pkl','rb'))

In [20]:
x_test.iloc[0].values.reshape(1 , -1)

print("model prediction label: " , my_model.predict(x_test.iloc[0].values.reshape(1 , -1)))
print("original label: " , y_test[0])

model prediction label:  [22]
original label:  22




# recommendation and prediction

# load datasets

In [21]:
symtoms_df = pd.read_csv("/content/symtoms_df.csv")
symtoms_df.head(4)

Unnamed: 0.1,Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4
0,0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches
1,1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,
2,2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,
3,3,Fungal infection,itching,skin_rash,dischromic _patches,


In [22]:
precautions_df = pd.read_csv("/content/precautions_df.csv")
precautions_df.head(4)

Unnamed: 0.1,Unnamed: 0,Disease,Precaution_1,Precaution_2,Precaution_3,Precaution_4
0,0,Drug Reaction,stop irritation,consult nearest hospital,stop taking drug,follow up
1,1,Malaria,Consult nearest hospital,avoid oily food,avoid non veg food,keep mosquitos out
2,2,Allergy,apply calamine,cover area with bandage,,use ice to compress itching
3,3,Hypothyroidism,reduce stress,exercise,eat healthy,get proper sleep


In [23]:
workout_df = pd.read_csv("/content/workout_df.csv")
workout_df.head(4)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,disease,workout
0,0,0,Fungal infection,Avoid sugary foods
1,1,1,Fungal infection,Consume probiotics
2,2,2,Fungal infection,Increase intake of garlic
3,3,3,Fungal infection,Include yogurt in diet


In [24]:
description_df = pd.read_csv("/content/description.csv")
description_df.head(4)

Unnamed: 0,Disease,Description
0,Fungal infection,Fungal infection is a common skin condition ca...
1,Allergy,Allergy is an immune system reaction to a subs...
2,GERD,GERD (Gastroesophageal Reflux Disease) is a di...
3,Chronic cholestasis,Chronic cholestasis is a condition where bile ...


In [25]:
medications_df = pd.read_csv("/content/medications.csv")
medications_df.head(4)

Unnamed: 0,Disease,Medication
0,Fungal infection,"['Antifungal Cream', 'Fluconazole', 'Terbinafi..."
1,Allergy,"['Antihistamines', 'Decongestants', 'Epinephri..."
2,GERD,"['Proton Pump Inhibitors (PPIs)', 'H2 Blockers..."
3,Chronic cholestasis,"['Ursodeoxycholic acid', 'Cholestyramine', 'Me..."


In [26]:
diets_df = pd.read_csv("/content/diets.csv")
diets_df.head(4)

Unnamed: 0,Disease,Diet
0,Fungal infection,"['Antifungal Diet', 'Probiotics', 'Garlic', 'C..."
1,Allergy,"['Elimination Diet', 'Omega-3-rich foods', 'Vi..."
2,GERD,"['Low-Acid Diet', 'Fiber-rich foods', 'Ginger'..."
3,Chronic cholestasis,"['Low-Fat Diet', 'High-Fiber Diet', 'Lean prot..."


In [27]:
doctor_df = pd.read_csv("/content/Doctor.csv")
doctor_df.head(4)

Unnamed: 0,Gender,Age,Disease,DoctorID,Specialization
0,M,58.0,Fungal infection,D42,Dermatologist
1,F,27.0,Allergy,D34,Immunologist
2,M,30.0,GERD,D77,Gastroenterologist
3,M,23.0,Chronic cholestasis,D109,Hepatologist


In [28]:
unique_values = []
unique_values2 = []

disease_dict = {}

for item in y_array:
    if item not in unique_values:
        unique_values.append(item)

for i in df["prognosis"].unique() :
    if i not in unique_values2:
        unique_values2.append(i)

for i in range (0 , len(unique_values2)) :
    disease_dict.update({unique_values[i] : unique_values2[i]})

# 41 disease
print(disease_dict)

{np.int64(15): 'Fungal infection', np.int64(4): 'Allergy', np.int64(16): 'GERD', np.int64(9): 'Chronic cholestasis', np.int64(14): 'Drug Reaction', np.int64(33): 'Peptic ulcer diseae', np.int64(1): 'AIDS', np.int64(12): 'Diabetes ', np.int64(17): 'Gastroenteritis', np.int64(6): 'Bronchial Asthma', np.int64(23): 'Hypertension ', np.int64(30): 'Migraine', np.int64(7): 'Cervical spondylosis', np.int64(32): 'Paralysis (brain hemorrhage)', np.int64(28): 'Jaundice', np.int64(29): 'Malaria', np.int64(8): 'Chicken pox', np.int64(11): 'Dengue', np.int64(37): 'Typhoid', np.int64(40): 'hepatitis A', np.int64(19): 'Hepatitis B', np.int64(20): 'Hepatitis C', np.int64(21): 'Hepatitis D', np.int64(22): 'Hepatitis E', np.int64(3): 'Alcoholic hepatitis', np.int64(36): 'Tuberculosis', np.int64(10): 'Common Cold', np.int64(34): 'Pneumonia', np.int64(13): 'Dimorphic hemmorhoids(piles)', np.int64(18): 'Heart attack', np.int64(39): 'Varicose veins', np.int64(26): 'Hypothyroidism', np.int64(24): 'Hyperthyroi

In [29]:
unique_values3 = []
unique_values4 = []

symptoms_dict = {}

for item in df.columns:
    if item not in unique_values3:
        if item != "prognosis":
            unique_values3.append(item)

for i in range (0 , len(df.columns)) :
    if i not in unique_values4:
        unique_values4.append(i)

for i in range(len(df.columns)-1) :
    symptoms_dict.update({unique_values3[i] : unique_values4[i]})

# 132 cols
print(symptoms_dict)

{'itching': 0, 'skin_rash': 1, 'nodal_skin_eruptions': 2, 'continuous_sneezing': 3, 'shivering': 4, 'chills': 5, 'joint_pain': 6, 'stomach_pain': 7, 'acidity': 8, 'ulcers_on_tongue': 9, 'muscle_wasting': 10, 'vomiting': 11, 'burning_micturition': 12, 'spotting_ urination': 13, 'fatigue': 14, 'weight_gain': 15, 'anxiety': 16, 'cold_hands_and_feets': 17, 'mood_swings': 18, 'weight_loss': 19, 'restlessness': 20, 'lethargy': 21, 'patches_in_throat': 22, 'irregular_sugar_level': 23, 'cough': 24, 'high_fever': 25, 'sunken_eyes': 26, 'breathlessness': 27, 'sweating': 28, 'dehydration': 29, 'indigestion': 30, 'headache': 31, 'yellowish_skin': 32, 'dark_urine': 33, 'nausea': 34, 'loss_of_appetite': 35, 'pain_behind_the_eyes': 36, 'back_pain': 37, 'constipation': 38, 'abdominal_pain': 39, 'diarrhoea': 40, 'mild_fever': 41, 'yellow_urine': 42, 'yellowing_of_eyes': 43, 'acute_liver_failure': 44, 'fluid_overload': 45, 'swelling_of_stomach': 46, 'swelled_lymph_nodes': 47, 'malaise': 48, 'blurred_and

# predict disease

In [30]:
# create a funtion to predict disease by using symtoms

def get_prediction(symptoms) :
    input_vector = np.zeros(len(symptoms_dict))

    for item in symptoms:
        input_vector[symptoms_dict[item]] = 1

    return disease_dict[my_model.predict([input_vector])[0]]

In [31]:
print("seperated your symptoms by using comma(,)")
# symptoms = input("Enter your symptoms: ")
symptoms = "skin_rash , itching"
symptoms

seperated your symptoms by using comma(,)


'skin_rash , itching'

In [32]:
symptoms = symptoms.lower()

# split by comma
patient_symptoms = [s.strip() for s in symptoms.split(',')]

patient_symptoms = [sym.strip() for sym in patient_symptoms]
# patient_symptoms

In [33]:
predicted_disease = get_prediction(patient_symptoms)



In [34]:
print("predicted_disease: " , predicted_disease)

predicted_disease:  Fungal infection


In [35]:
# create an helper funtion to show all the realated info of that disease
# return required columns from each datasets

def helper(disease) :

    # description

    desc = description_df[description_df["Disease"] == disease]["Description"]
    # desc

    # showing description in proper format
    desc = " ".join([i for i in desc])

#     # precaution

    precaution = precautions_df[precautions_df["Disease"] == disease][
        ["Precaution_1" , "Precaution_2" , "Precaution_3" , "Precaution_4"]
    ]
    precaution = [i for i in precaution.values]

    # symptoms

    symptoms = symtoms_df[symtoms_df["Disease"] == disease][
        ["Symptom_1" , "Symptom_2" , "Symptom_3" , "Symptom_4"]
    ]
    symptoms = [i for i in symptoms.values]

#     # medication

    medicine = medications_df[medications_df["Disease"] == disease]["Medication"]
    medicine = [i for i in medicine.values]

#     # diet

    diet = diets_df[diets_df["Disease"] == disease]["Diet"]
    diet = [i for i in diet.values]

#     # workout

    workout = workout_df[workout_df["disease"] == disease]["workout"]

#     doctor

    doctor = doctor_df[doctor_df["Disease"] == disease]

#     doctor_id =
#     Doctor ID


    return desc , precaution , medicine , diet , workout , symptoms , doctor

In [36]:
desc , precaution , medicine , diet , workout , symptoms , doctor = helper(predicted_disease)

In [37]:
print("Description:")
print(desc)

Description:
Fungal infection is a common skin condition caused by fungi.


In [38]:
print("Precautions:")

j = 1

for i in precaution[0]:
    print(j , ": " , i)
    j += 1

Precautions:
1 :  bath twice
2 :  use detol or neem in bathing water
3 :  keep infected area dry
4 :  use clean cloths


In [39]:
print("Related symptoms:")

j = 1

for i in symptoms[0]:
    print(j , " : " , i)
    j += 1

Related symptoms:
1  :  itching
2  :   skin_rash
3  :   nodal_skin_eruptions
4  :   dischromic _patches


In [40]:
print("Medications:")

for i in medicine:
    print(i)

Medications:
['Antifungal Cream', 'Fluconazole', 'Terbinafine', 'Clotrimazole', 'Ketoconazole']


In [41]:
print("Diets:")

for i in diet:
    print(i)

Diets:
['Antifungal Diet', 'Probiotics', 'Garlic', 'Coconut oil', 'Turmeric']


In [42]:
print("Workouts:")

j = 1;

for i in workout:
    print(j , ": " , i)
    j += 1

Workouts:
1 :  Avoid sugary foods
2 :  Consume probiotics
3 :  Increase intake of garlic
4 :  Include yogurt in diet
5 :  Limit processed foods
6 :  Stay hydrated
7 :  Consume green tea
8 :  Eat foods rich in zinc
9 :  Include turmeric in diet
10 :  Eat fruits and vegetables


In [43]:
for i in doctor.Specialization:
    print("Specialization:" , i)

for i in doctor.Gender:
    print("Gender:" , i)

for i in doctor.DoctorID:
    print("DoctorID:" , i)

Specialization: Dermatologist 
Gender: M
DoctorID: D42
