# Importing Libs & Dataset

In [40]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix, precision_score, recall_score,accuracy_score
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.neighbors import KNeighborsClassifier
from joblib import dump

import pathlib

In [41]:
df= pd.read_csv('dataset.csv')
df

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
3,Fungal infection,itching,skin_rash,dischromic _patches,,,,,,,,,,,,,,
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,(vertigo) Paroymsal Positional Vertigo,vomiting,headache,nausea,spinning_movements,loss_of_balance,unsteadiness,,,,,,,,,,,
4916,Acne,skin_rash,pus_filled_pimples,blackheads,scurring,,,,,,,,,,,,,
4917,Urinary tract infection,burning_micturition,bladder_discomfort,foul_smell_of urine,continuous_feel_of_urine,,,,,,,,,,,,,
4918,Psoriasis,skin_rash,joint_pain,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,,,,,,,,,,,


In [42]:
df['Disease'].value_counts()

Disease
Fungal infection                           120
Hepatitis C                                120
Hepatitis E                                120
Alcoholic hepatitis                        120
Tuberculosis                               120
Common Cold                                120
Pneumonia                                  120
Dimorphic hemmorhoids(piles)               120
Heart attack                               120
Varicose veins                             120
Hypothyroidism                             120
Hyperthyroidism                            120
Hypoglycemia                               120
Osteoarthristis                            120
Arthritis                                  120
(vertigo) Paroymsal  Positional Vertigo    120
Acne                                       120
Urinary tract infection                    120
Psoriasis                                  120
Hepatitis D                                120
Hepatitis B                                120
Aller

# Remove NaN Values

In [43]:
df.fillna(0, inplace=True)

# Splitting b/w Train & Test Sets

In [44]:
x_train,x_test,y_train,y_test=train_test_split(df.drop(columns=['Disease']), df['Disease'], test_size=0.1, )

In [45]:
x_train

Unnamed: 0,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
2536,muscle_weakness,stiff_neck,swelling_joints,movement_stiffness,painful_walking,0,0,0,0,0,0,0,0,0,0,0,0
276,chills,fatigue,cough,breathlessness,sweating,malaise,phlegm,chest_pain,fast_heart_rate,rusty_sputum,0,0,0,0,0,0,0
579,itching,skin_rash,fatigue,lethargy,high_fever,headache,loss_of_appetite,mild_fever,malaise,red_spots_over_body,0,0,0,0,0,0,0
3704,itching,vomiting,fatigue,weight_loss,high_fever,yellowish_skin,dark_urine,abdominal_pain,0,0,0,0,0,0,0,0,0
2036,skin_rash,joint_pain,skin_peeling,silver_like_dusting,inflammatory_nails,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2991,skin_rash,joint_pain,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,0,0,0,0,0,0,0,0,0,0,0
3743,back_pain,weakness_in_limbs,neck_pain,dizziness,loss_of_balance,0,0,0,0,0,0,0,0,0,0,0,0
3965,vomiting,breathlessness,sweating,chest_pain,0,0,0,0,0,0,0,0,0,0,0,0,0
2119,muscle_wasting,patches_in_throat,high_fever,0,0,0,0,0,0,0,0,0,0,0,0,0,0


# Data Cleaning (Without Transformers)

In [46]:
class SymtomsCleaner(BaseEstimator, TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform (self,X,y=None):
        lis=[]
        for i in list(X.values.tolist()):
            l=[]
            for j in i:
                if(j!=0):
                    l.append(j)
                else:
                    continue
            lis.append(l)
        return lis

In [47]:
x_cleaned=SymtomsCleaner().fit_transform(x_train)
x_cleaned

[[' muscle_weakness',
  ' stiff_neck',
  ' swelling_joints',
  ' movement_stiffness',
  ' painful_walking'],
 [' chills',
  ' fatigue',
  ' cough',
  ' breathlessness',
  ' sweating',
  ' malaise',
  ' phlegm',
  ' chest_pain',
  ' fast_heart_rate',
  ' rusty_sputum'],
 ['itching',
  ' skin_rash',
  ' fatigue',
  ' lethargy',
  ' high_fever',
  ' headache',
  ' loss_of_appetite',
  ' mild_fever',
  ' malaise',
  ' red_spots_over_body'],
 ['itching',
  ' vomiting',
  ' fatigue',
  ' weight_loss',
  ' high_fever',
  ' yellowish_skin',
  ' dark_urine',
  ' abdominal_pain'],
 [' skin_rash',
  ' joint_pain',
  ' skin_peeling',
  ' silver_like_dusting',
  ' inflammatory_nails'],
 ['itching',
  ' vomiting',
  ' yellowish_skin',
  ' nausea',
  ' loss_of_appetite',
  ' abdominal_pain',
  ' yellowing_of_eyes'],
 [' vomiting',
  ' indigestion',
  ' loss_of_appetite',
  ' abdominal_pain',
  ' passage_of_gases',
  ' internal_itching'],
 [' muscle_weakness',
  ' stiff_neck',
  ' movement_stiffness',

In [48]:
mlb=MultiLabelBinarizer()
x_final=mlb.fit_transform(x_cleaned)
mlb_1=set(mlb.classes_)

In [49]:
df

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Fungal infection,itching,skin_rash,dischromic _patches,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,(vertigo) Paroymsal Positional Vertigo,vomiting,headache,nausea,spinning_movements,loss_of_balance,unsteadiness,0,0,0,0,0,0,0,0,0,0,0
4916,Acne,skin_rash,pus_filled_pimples,blackheads,scurring,0,0,0,0,0,0,0,0,0,0,0,0,0
4917,Urinary tract infection,burning_micturition,bladder_discomfort,foul_smell_of urine,continuous_feel_of_urine,0,0,0,0,0,0,0,0,0,0,0,0,0
4918,Psoriasis,skin_rash,joint_pain,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,0,0,0,0,0,0,0,0,0,0,0


In [50]:
def y_transform(y):
    return np.array(y)

In [51]:
y_final=y_transform(y_train)

In [52]:
x_test_cleaned=SymtomsCleaner().fit_transform(x_test)


In [53]:
mlb1=MultiLabelBinarizer()
x_test_final=mlb1.fit_transform(x_test_cleaned)
len(set(mlb1.classes_).intersection(set(mlb.classes_)))

131

In [54]:
y_test_final=y_transform(y_test)

# Training the Model

In [55]:
rnf=RandomForestClassifier(n_jobs=-1)
rnf.fit(x_final,y_final)

In [56]:
cross_val_score(rnf,x_final,y_final, cv=3, scoring='accuracy')

array([1., 1., 1.])

In [57]:
knn=KNeighborsClassifier()
knn.fit(x_final,y_final)

In [58]:
sgd=SGDClassifier(n_jobs=-1)
sgd.fit(x_final,y_final)

In [59]:
cross_val_score(sgd,x_final,y_final, cv=5, scoring='accuracy')

array([1., 1., 1., 1., 1.])

In [60]:
cvp=cross_val_predict(sgd,x_final,y_final,)

In [61]:
confusion_matrix(cvp,y_final)

array([[111,   0,   0, ...,   0,   0,   0],
       [  0, 109,   0, ...,   0,   0,   0],
       [  0,   0, 112, ...,   0,   0,   0],
       ...,
       [  0,   0,   0, ..., 107,   0,   0],
       [  0,   0,   0, ...,   0, 110,   0],
       [  0,   0,   0, ...,   0,   0, 110]])

In [62]:
precision_score(cvp,y_final,average='macro')

1.0

In [63]:
recall_score(cvp,y_final,average='macro')

1.0

In [64]:
accuracy_score(knn.predict(x_test_final),y_test_final)

1.0

In [65]:
knn.predict(x_test_final)

array(['Hepatitis C', 'Dimorphic hemmorhoids(piles)', 'Fungal infection',
       'Hepatitis E', 'Chicken pox', 'Cervical spondylosis', 'Acne',
       'Paralysis (brain hemorrhage)', 'Varicose veins', 'Tuberculosis',
       'Dimorphic hemmorhoids(piles)', 'AIDS',
       'Paralysis (brain hemorrhage)', 'Chronic cholestasis',
       'Hepatitis B', 'Jaundice', 'Hepatitis C', 'Osteoarthristis',
       'Bronchial Asthma', 'hepatitis A', 'Arthritis', 'hepatitis A',
       'Heart attack', 'Dimorphic hemmorhoids(piles)',
       'Cervical spondylosis', 'Hepatitis B', 'AIDS', 'Hypoglycemia',
       'Peptic ulcer diseae', 'Bronchial Asthma', 'Psoriasis',
       'hepatitis A', 'Tuberculosis', 'GERD',
       'Dimorphic hemmorhoids(piles)', 'GERD', 'Pneumonia', 'Hepatitis B',
       'Migraine', '(vertigo) Paroymsal  Positional Vertigo',
       'Paralysis (brain hemorrhage)', 'AIDS', 'Chicken pox', 'Psoriasis',
       'Arthritis', 'Dimorphic hemmorhoids(piles)', 'Hypertension ',
       'Drug Reaction'

In [66]:
dump(rnf, pathlib.Path("symptoms-disease_model.joblib"))

['symptoms-disease_model.joblib']

In [115]:
mlb.classes_

array([' abdominal_pain', ' abnormal_menstruation', ' acidity',
       ' acute_liver_failure', ' altered_sensorium', ' anxiety',
       ' back_pain', ' belly_pain', ' blackheads', ' bladder_discomfort',
       ' blister', ' blood_in_sputum', ' bloody_stool',
       ' blurred_and_distorted_vision', ' breathlessness',
       ' brittle_nails', ' bruising', ' burning_micturition',
       ' chest_pain', ' chills', ' cold_hands_and_feets', ' coma',
       ' congestion', ' constipation', ' continuous_feel_of_urine',
       ' continuous_sneezing', ' cough', ' cramps', ' dark_urine',
       ' dehydration', ' depression', ' diarrhoea',
       ' dischromic _patches', ' distention_of_abdomen', ' dizziness',
       ' drying_and_tingling_lips', ' enlarged_thyroid',
       ' excessive_hunger', ' extra_marital_contacts', ' family_history',
       ' fast_heart_rate', ' fatigue', ' fluid_overload',
       ' foul_smell_of urine', ' headache', ' high_fever',
       ' hip_joint_pain', ' history_of_alcohol_

In [125]:
rnf.predict([x_test_final[-1]])

array(['Allergy'], dtype=object)