# Importing Libs & Dataset

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix, precision_score, recall_score,accuracy_score
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.neighbors import KNeighborsClassifier
from joblib import dump

import pathlib

In [2]:
df= pd.read_csv('dataset.csv')
df

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
3,Fungal infection,itching,skin_rash,dischromic _patches,,,,,,,,,,,,,,
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,(vertigo) Paroymsal Positional Vertigo,vomiting,headache,nausea,spinning_movements,loss_of_balance,unsteadiness,,,,,,,,,,,
4916,Acne,skin_rash,pus_filled_pimples,blackheads,scurring,,,,,,,,,,,,,
4917,Urinary tract infection,burning_micturition,bladder_discomfort,foul_smell_of urine,continuous_feel_of_urine,,,,,,,,,,,,,
4918,Psoriasis,skin_rash,joint_pain,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,,,,,,,,,,,


In [3]:
df['Disease'].value_counts()

Disease
Fungal infection                           120
Hepatitis C                                120
Hepatitis E                                120
Alcoholic hepatitis                        120
Tuberculosis                               120
Common Cold                                120
Pneumonia                                  120
Dimorphic hemmorhoids(piles)               120
Heart attack                               120
Varicose veins                             120
Hypothyroidism                             120
Hyperthyroidism                            120
Hypoglycemia                               120
Osteoarthristis                            120
Arthritis                                  120
(vertigo) Paroymsal  Positional Vertigo    120
Acne                                       120
Urinary tract infection                    120
Psoriasis                                  120
Hepatitis D                                120
Hepatitis B                                120
Aller

# Remove NaN Values

In [4]:
df.fillna(0, inplace=True)

# Splitting b/w Train & Test Sets

In [93]:
x_train,x_test,y_train,y_test=train_test_split(df.drop(columns=['Disease']), df['Disease'], test_size=0.1, )

In [94]:
x_train

Unnamed: 0,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
3794,joint_pain,vomiting,fatigue,yellowish_skin,dark_urine,nausea,loss_of_appetite,abdominal_pain,yellowing_of_eyes,0,0,0,0,0,0,0,0
4174,vomiting,fatigue,anxiety,sweating,headache,nausea,blurred_and_distorted_vision,excessive_hunger,drying_and_tingling_lips,slurred_speech,irritability,palpitations,0,0,0,0,0
2868,skin_rash,joint_pain,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,0,0,0,0,0,0,0,0,0,0,0
1529,vomiting,breathlessness,chest_pain,0,0,0,0,0,0,0,0,0,0,0,0,0,0
379,skin_rash,blackheads,scurring,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,itching,lethargy,yellowish_skin,dark_urine,loss_of_appetite,abdominal_pain,yellow_urine,yellowing_of_eyes,malaise,receiving_blood_transfusion,receiving_unsterile_injections,0,0,0,0,0,0
2552,headache,chest_pain,dizziness,loss_of_balance,lack_of_concentration,0,0,0,0,0,0,0,0,0,0,0,0
4144,itching,vomiting,yellowish_skin,nausea,loss_of_appetite,abdominal_pain,yellowing_of_eyes,0,0,0,0,0,0,0,0,0,0
2345,vomiting,breathlessness,sweating,0,0,0,0,0,0,0,0,0,0,0,0,0,0


# Data Cleaning (Without Transformers)

In [29]:
class SymtomsCleaner(BaseEstimator, TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform (self,X,y=None):
        lis=[]
        for i in list(X.values.tolist()):
            l=[]
            for j in i:
                if(j!=0):
                    l.append(j)
                else:
                    continue
            lis.append(l)
        return lis

In [96]:
x_cleaned=SymtomsCleaner().fit_transform(x_train)
x_cleaned

[[' joint_pain',
  ' vomiting',
  ' fatigue',
  ' yellowish_skin',
  ' dark_urine',
  ' nausea',
  ' loss_of_appetite',
  ' abdominal_pain',
  ' yellowing_of_eyes'],
 [' vomiting',
  ' fatigue',
  ' anxiety',
  ' sweating',
  ' headache',
  ' nausea',
  ' blurred_and_distorted_vision',
  ' excessive_hunger',
  ' drying_and_tingling_lips',
  ' slurred_speech',
  ' irritability',
  ' palpitations'],
 [' skin_rash',
  ' joint_pain',
  ' skin_peeling',
  ' silver_like_dusting',
  ' small_dents_in_nails',
  ' inflammatory_nails'],
 [' vomiting', ' breathlessness', ' chest_pain'],
 [' skin_rash', ' blackheads', ' scurring'],
 [' vomiting', ' sunken_eyes', ' dehydration', ' diarrhoea'],
 [' vomiting',
  ' headache',
  ' weakness_of_one_body_side',
  ' altered_sensorium'],
 [' skin_rash', ' pus_filled_pimples', ' blackheads'],
 [' sunken_eyes', ' dehydration', ' diarrhoea'],
 [' skin_rash', ' pus_filled_pimples', ' blackheads', ' scurring'],
 [' joint_pain',
  ' skin_peeling',
  ' silver_like_

In [97]:
mlb=MultiLabelBinarizer(classes=[' abdominal_pain', ' abnormal_menstruation', ' acidity',
       ' acute_liver_failure', ' altered_sensorium', ' anxiety',
       ' back_pain', ' belly_pain', ' blackheads', ' bladder_discomfort',
       ' blister', ' blood_in_sputum', ' bloody_stool',
       ' blurred_and_distorted_vision', ' breathlessness',
       ' brittle_nails', ' bruising', ' burning_micturition',
       ' chest_pain', ' chills', ' cold_hands_and_feets', ' coma',
       ' congestion', ' constipation', ' continuous_feel_of_urine',
       ' continuous_sneezing', ' cough', ' cramps', ' dark_urine',
       ' dehydration', ' depression', ' diarrhoea',
       ' dischromic _patches', ' distention_of_abdomen', ' dizziness',
       ' drying_and_tingling_lips', ' enlarged_thyroid',
       ' excessive_hunger', ' extra_marital_contacts', ' family_history',
       ' fast_heart_rate', ' fatigue', ' fluid_overload',
       ' foul_smell_of urine', ' headache', ' high_fever',
       ' hip_joint_pain', ' history_of_alcohol_consumption',
       ' increased_appetite', ' indigestion', ' inflammatory_nails',
       ' internal_itching', ' irregular_sugar_level', ' irritability',
       ' irritation_in_anus', ' joint_pain', ' knee_pain',
       ' lack_of_concentration', ' lethargy', ' loss_of_appetite',
       ' loss_of_balance', ' loss_of_smell', ' malaise', ' mild_fever',
       ' mood_swings', ' movement_stiffness', ' mucoid_sputum',
       ' muscle_pain', ' muscle_wasting', ' muscle_weakness', ' nausea',
       ' neck_pain', ' nodal_skin_eruptions', ' obesity',
       ' pain_behind_the_eyes', ' pain_during_bowel_movements',
       ' pain_in_anal_region', ' painful_walking', ' palpitations',
       ' passage_of_gases', ' patches_in_throat', ' phlegm', ' polyuria',
       ' prominent_veins_on_calf', ' puffy_face_and_eyes',
       ' pus_filled_pimples', ' receiving_blood_transfusion',
       ' receiving_unsterile_injections', ' red_sore_around_nose',
       ' red_spots_over_body', ' redness_of_eyes', ' restlessness',
       ' runny_nose', ' rusty_sputum', ' scurring', ' shivering',
       ' silver_like_dusting', ' sinus_pressure', ' skin_peeling',
       ' skin_rash', ' slurred_speech', ' small_dents_in_nails',
       ' spinning_movements', ' spotting_ urination', ' stiff_neck',
       ' stomach_bleeding', ' stomach_pain', ' sunken_eyes', ' sweating',
       ' swelled_lymph_nodes', ' swelling_joints', ' swelling_of_stomach',
       ' swollen_blood_vessels', ' swollen_extremeties', ' swollen_legs',
       ' throat_irritation', ' toxic_look_(typhos)', ' ulcers_on_tongue',
       ' unsteadiness', ' visual_disturbances', ' vomiting',
       ' watering_from_eyes', ' weakness_in_limbs',
       ' weakness_of_one_body_side', ' weight_gain', ' weight_loss',
       ' yellow_crust_ooze', ' yellow_urine', ' yellowing_of_eyes',
       ' yellowish_skin', 'itching'])
x_final=mlb.fit_transform(x_cleaned)
mlb.classes_

array([' abdominal_pain', ' abnormal_menstruation', ' acidity',
       ' acute_liver_failure', ' altered_sensorium', ' anxiety',
       ' back_pain', ' belly_pain', ' blackheads', ' bladder_discomfort',
       ' blister', ' blood_in_sputum', ' bloody_stool',
       ' blurred_and_distorted_vision', ' breathlessness',
       ' brittle_nails', ' bruising', ' burning_micturition',
       ' chest_pain', ' chills', ' cold_hands_and_feets', ' coma',
       ' congestion', ' constipation', ' continuous_feel_of_urine',
       ' continuous_sneezing', ' cough', ' cramps', ' dark_urine',
       ' dehydration', ' depression', ' diarrhoea',
       ' dischromic _patches', ' distention_of_abdomen', ' dizziness',
       ' drying_and_tingling_lips', ' enlarged_thyroid',
       ' excessive_hunger', ' extra_marital_contacts', ' family_history',
       ' fast_heart_rate', ' fatigue', ' fluid_overload',
       ' foul_smell_of urine', ' headache', ' high_fever',
       ' hip_joint_pain', ' history_of_alcohol_

In [98]:
df

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Fungal infection,itching,skin_rash,dischromic _patches,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,(vertigo) Paroymsal Positional Vertigo,vomiting,headache,nausea,spinning_movements,loss_of_balance,unsteadiness,0,0,0,0,0,0,0,0,0,0,0
4916,Acne,skin_rash,pus_filled_pimples,blackheads,scurring,0,0,0,0,0,0,0,0,0,0,0,0,0
4917,Urinary tract infection,burning_micturition,bladder_discomfort,foul_smell_of urine,continuous_feel_of_urine,0,0,0,0,0,0,0,0,0,0,0,0,0
4918,Psoriasis,skin_rash,joint_pain,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,0,0,0,0,0,0,0,0,0,0,0


In [99]:
def y_transform(y):
    return np.array(y)

In [100]:
y_final=y_transform(y_train)

In [121]:
x_test_cleaned=SymtomsCleaner().fit_transform(x_test)
x_test_cleaned.append([' cough',' headache',' high_fever',' nausea',' shivering'])
x_test_cleaned[-1]

[' cough', ' headache', ' high_fever', ' nausea', ' shivering']

In [122]:

x_test_final=mlb.fit_transform(x_test_cleaned)


In [123]:
y_test_final=y_transform(y_test)

# Training the Model

In [124]:
rnf=RandomForestClassifier(n_jobs=-1)
rnf.fit(x_final,y_final)

In [105]:
cross_val_score(rnf,x_final,y_final, cv=3, scoring='accuracy')

array([1., 1., 1.])

In [106]:
knn=KNeighborsClassifier()
knn.fit(x_final,y_final)

In [107]:
sgd=SGDClassifier(n_jobs=-1)
sgd.fit(x_final,y_final)

In [108]:
cross_val_score(sgd,x_final,y_final, cv=5, scoring='accuracy')

array([1., 1., 1., 1., 1.])

In [109]:
cvp=cross_val_predict(sgd,x_final,y_final,)

In [110]:
confusion_matrix(cvp,y_final)

array([[106,   0,   0, ...,   0,   0,   0],
       [  0, 108,   0, ...,   0,   0,   0],
       [  0,   0, 110, ...,   0,   0,   0],
       ...,
       [  0,   0,   0, ..., 109,   0,   0],
       [  0,   0,   0, ...,   0, 106,   0],
       [  0,   0,   0, ...,   0,   0, 110]])

In [111]:
precision_score(cvp,y_final,average='macro')

1.0

In [112]:
recall_score(cvp,y_final,average='macro')

1.0

In [47]:
accuracy_score(knn.predict(x_test_final),y_test_final)

ValueError: X has 135 features, but KNeighborsClassifier is expecting 131 features as input.

In [113]:
knn.predict(x_test_final)

array(['Hepatitis B', 'Typhoid', 'Dengue', 'Psoriasis', 'Malaria',
       'Hepatitis D', 'Hypoglycemia', 'AIDS', 'AIDS', 'Hepatitis B',
       'Heart attack', 'AIDS', 'GERD', 'Common Cold', 'Heart attack',
       'Tuberculosis', 'Jaundice', 'Diabetes ', 'Arthritis',
       'Peptic ulcer diseae', 'Chronic cholestasis', 'Fungal infection',
       'AIDS', 'Common Cold', 'AIDS', 'Psoriasis', 'Drug Reaction',
       'Bronchial Asthma', 'Dengue', 'Malaria', 'Chicken pox',
       'Gastroenteritis', 'hepatitis A', 'AIDS', 'Hepatitis D',
       'Heart attack', 'Jaundice', 'Alcoholic hepatitis', 'Allergy',
       'hepatitis A', 'Hepatitis C', 'Gastroenteritis', 'Osteoarthristis',
       'Hepatitis E', 'Dengue', 'Common Cold', 'Heart attack', 'Migraine',
       'Dimorphic hemmorhoids(piles)', 'Paralysis (brain hemorrhage)',
       'Varicose veins', 'Alcoholic hepatitis', 'Arthritis',
       'Gastroenteritis', 'Pneumonia', 'Dengue', 'Hyperthyroidism',
       'Hepatitis D', 'Pneumonia', 'Bronchial 

In [114]:
dump(rnf, pathlib.Path("symptoms-disease_model.joblib"))

['symptoms-disease_model.joblib']

In [115]:
mlb.classes_

array([' abdominal_pain', ' abnormal_menstruation', ' acidity',
       ' acute_liver_failure', ' altered_sensorium', ' anxiety',
       ' back_pain', ' belly_pain', ' blackheads', ' bladder_discomfort',
       ' blister', ' blood_in_sputum', ' bloody_stool',
       ' blurred_and_distorted_vision', ' breathlessness',
       ' brittle_nails', ' bruising', ' burning_micturition',
       ' chest_pain', ' chills', ' cold_hands_and_feets', ' coma',
       ' congestion', ' constipation', ' continuous_feel_of_urine',
       ' continuous_sneezing', ' cough', ' cramps', ' dark_urine',
       ' dehydration', ' depression', ' diarrhoea',
       ' dischromic _patches', ' distention_of_abdomen', ' dizziness',
       ' drying_and_tingling_lips', ' enlarged_thyroid',
       ' excessive_hunger', ' extra_marital_contacts', ' family_history',
       ' fast_heart_rate', ' fatigue', ' fluid_overload',
       ' foul_smell_of urine', ' headache', ' high_fever',
       ' hip_joint_pain', ' history_of_alcohol_

In [125]:
rnf.predict([x_test_final[-1]])

array(['Allergy'], dtype=object)

In [5]:
df_nod=df.drop_duplicates()

In [139]:
df_nod=SymtomsCleaner().fit_transform(df_nod)

In [18]:
df_dict=df_nod.groupby('Disease').agg({'Symptom_1': list,'Symptom_2': list,'Symptom_3': list, 'Symptom_4': list, 'Symptom_5': list, 'Symptom_6': list, 'Symptom_7': list, 'Symptom_8': list, 'Symptom_9': list, 'Symptom_10': list,'Symptom_11': list, 'Symptom_12': list, 'Symptom_13': list, 'Symptom_14': list, 'Symptom_15': list, 'Symptom_16': list, 'Symptom_17': list  })

In [35]:
df_nod.loc[df['Disease']=='AIDS', 'Symptom_1']

60        muscle_wasting
61     patches_in_throat
62        muscle_wasting
63        muscle_wasting
64        muscle_wasting
Name: Symptom_1, dtype: object

In [32]:
df_dict_vals=SymtomsCleaner().fit_transform(df_dict)

In [40]:
li=[]
for i in df_dict_vals:
    l=[]
    for j in i:
        for k in j:
            l.append(k)
    li.append(l)

In [58]:
l=[list(set(i)) for i in li]

In [59]:
dict_vals=[]
for i in l:
    fl=[]
    for j in i:
        if(j!=0):
            fl.append(j)
    dict_vals.append(fl)

In [70]:
dictionary=dict(zip(list(df_dict.index),dict_vals))

In [71]:
dictionary

{'(vertigo) Paroymsal  Positional Vertigo': [' nausea',
  ' loss_of_balance',
  ' unsteadiness',
  ' spinning_movements',
  ' vomiting',
  ' headache'],
 'AIDS': [' high_fever',
  ' patches_in_throat',
  ' extra_marital_contacts',
  ' muscle_wasting'],
 'Acne': [' skin_rash', ' scurring', ' pus_filled_pimples', ' blackheads'],
 'Alcoholic hepatitis': [' distention_of_abdomen',
  ' history_of_alcohol_consumption',
  ' fluid_overload',
  ' yellowish_skin',
  ' swelling_of_stomach',
  ' vomiting',
  ' abdominal_pain'],
 'Allergy': [' shivering',
  ' watering_from_eyes',
  ' continuous_sneezing',
  ' chills'],
 'Arthritis': [' movement_stiffness',
  ' painful_walking',
  ' swelling_joints',
  ' muscle_weakness',
  ' stiff_neck'],
 'Bronchial Asthma': [' high_fever',
  ' family_history',
  ' fatigue',
  ' cough',
  ' mucoid_sputum',
  ' breathlessness'],
 'Cervical spondylosis': [' back_pain',
  ' neck_pain',
  ' loss_of_balance',
  ' dizziness',
  ' weakness_in_limbs'],
 'Chicken pox': [' 

In [73]:
dump(dictionary, pathlib.Path("data_dict.joblib"))

['data_dict.joblib']