# Importing Libs & Dataset

In [37]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix, precision_score, recall_score,accuracy_score
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

In [38]:
df= pd.read_csv('dataset.csv')
df

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
3,Fungal infection,itching,skin_rash,dischromic _patches,,,,,,,,,,,,,,
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,(vertigo) Paroymsal Positional Vertigo,vomiting,headache,nausea,spinning_movements,loss_of_balance,unsteadiness,,,,,,,,,,,
4916,Acne,skin_rash,pus_filled_pimples,blackheads,scurring,,,,,,,,,,,,,
4917,Urinary tract infection,burning_micturition,bladder_discomfort,foul_smell_of urine,continuous_feel_of_urine,,,,,,,,,,,,,
4918,Psoriasis,skin_rash,joint_pain,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,,,,,,,,,,,


In [39]:
df['Disease'].value_counts()

Disease
Fungal infection                           120
Hepatitis C                                120
Hepatitis E                                120
Alcoholic hepatitis                        120
Tuberculosis                               120
Common Cold                                120
Pneumonia                                  120
Dimorphic hemmorhoids(piles)               120
Heart attack                               120
Varicose veins                             120
Hypothyroidism                             120
Hyperthyroidism                            120
Hypoglycemia                               120
Osteoarthristis                            120
Arthritis                                  120
(vertigo) Paroymsal  Positional Vertigo    120
Acne                                       120
Urinary tract infection                    120
Psoriasis                                  120
Hepatitis D                                120
Hepatitis B                                120
Aller

# Remove NaN Values

In [40]:
df.fillna(0, inplace=True)

# Splitting b/w Train & Test Sets

In [41]:
x_train,x_test,y_train,y_test=train_test_split(df.drop(columns=['Disease']), df['Disease'], test_size=0.2, )

In [42]:
x_train

Unnamed: 0,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
4216,joint_pain,neck_pain,knee_pain,hip_joint_pain,swelling_joints,painful_walking,0,0,0,0,0,0,0,0,0,0,0
3940,itching,skin_rash,stomach_pain,burning_micturition,spotting_ urination,0,0,0,0,0,0,0,0,0,0,0,0
4540,fatigue,cramps,bruising,obesity,swollen_legs,swollen_blood_vessels,prominent_veins_on_calf,0,0,0,0,0,0,0,0,0,0
3911,itching,skin_rash,fatigue,lethargy,high_fever,headache,loss_of_appetite,mild_fever,swelled_lymph_nodes,malaise,red_spots_over_body,0,0,0,0,0,0
4319,itching,vomiting,fatigue,weight_loss,high_fever,yellowish_skin,dark_urine,abdominal_pain,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4071,back_pain,weakness_in_limbs,neck_pain,dizziness,loss_of_balance,0,0,0,0,0,0,0,0,0,0,0,0
2211,skin_rash,fatigue,lethargy,high_fever,headache,loss_of_appetite,mild_fever,swelled_lymph_nodes,malaise,red_spots_over_body,0,0,0,0,0,0,0
2394,joint_pain,neck_pain,hip_joint_pain,swelling_joints,painful_walking,0,0,0,0,0,0,0,0,0,0,0,0
4906,chills,fatigue,cough,high_fever,breathlessness,sweating,malaise,phlegm,chest_pain,fast_heart_rate,rusty_sputum,0,0,0,0,0,0


# Data Cleaning (With Transformers)

In [51]:
class SymtomsCleaner(BaseEstimator, TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform (self,X,y=None):
        lis=[]
        for i in list(X.values.tolist()):
            l=[]
            for j in i:
                if(j!=0):
                    l.append(j)
                else:
                    continue
            lis.append(l)
        return lis

In [52]:
class MyMlb(BaseEstimator, TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform(self, X, y=None):
        return MultiLabelBinarizer().fit_transform(X)

In [65]:
pipe=Pipeline([
    ('symptom_cleaning',SymtomsCleaner()),
    ('mlb',MyMlb())
])
x_final=pipe.fit_transform(x_train)

In [66]:
def y_transform(y):
    return np.array(y)

In [77]:
y_final=y_transform(y_train)

In [81]:
x_test_final=pipe.fit_transform(x_test)

In [82]:
y_test_final=y_transform(y_test)

# Training the Model

In [68]:
rnf=RandomForestClassifier(n_jobs=-1)
rnf.fit(x_final,y_final)

In [69]:
cross_val_score(rnf,x_final,y_final, cv=3, scoring='accuracy')

array([1., 1., 1.])

In [70]:
sgd=SGDClassifier(n_jobs=-1)
sgd.fit(x_final,y_final)

In [71]:
cross_val_score(sgd,x_final,y_final, cv=2, scoring='accuracy')

array([1., 1.])

In [72]:
cvp=cross_val_predict(sgd,x_final,y_final,)

In [73]:
confusion_matrix(cvp,y_final)

array([[100,   0,   0, ...,   0,   0,   0],
       [  0,  95,   0, ...,   0,   0,   0],
       [  0,   0,  90, ...,   0,   0,   0],
       ...,
       [  0,   0,   0, ...,  95,   0,   0],
       [  0,   0,   0, ...,   0,  98,   0],
       [  0,   0,   0, ...,   0,   0,  98]])

In [74]:
precision_score(cvp,y_final,average='micro')

1.0

In [75]:
recall_score(cvp,y_final,average='micro')

1.0

In [84]:
accuracy_score(sgd.predict(x_test_final),y_test_final)

1.0