# Importing Libs & Dataset

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix, precision_score, recall_score,accuracy_score
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.neighbors import KNeighborsClassifier
from joblib import dump
import pathlib

In [2]:
df= pd.read_csv('dataset.csv')
df

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
3,Fungal infection,itching,skin_rash,dischromic _patches,,,,,,,,,,,,,,
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,(vertigo) Paroymsal Positional Vertigo,vomiting,headache,nausea,spinning_movements,loss_of_balance,unsteadiness,,,,,,,,,,,
4916,Acne,skin_rash,pus_filled_pimples,blackheads,scurring,,,,,,,,,,,,,
4917,Urinary tract infection,burning_micturition,bladder_discomfort,foul_smell_of urine,continuous_feel_of_urine,,,,,,,,,,,,,
4918,Psoriasis,skin_rash,joint_pain,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,,,,,,,,,,,


In [3]:
df['Disease'].value_counts()

Disease
Fungal infection                           120
Hepatitis C                                120
Hepatitis E                                120
Alcoholic hepatitis                        120
Tuberculosis                               120
Common Cold                                120
Pneumonia                                  120
Dimorphic hemmorhoids(piles)               120
Heart attack                               120
Varicose veins                             120
Hypothyroidism                             120
Hyperthyroidism                            120
Hypoglycemia                               120
Osteoarthristis                            120
Arthritis                                  120
(vertigo) Paroymsal  Positional Vertigo    120
Acne                                       120
Urinary tract infection                    120
Psoriasis                                  120
Hepatitis D                                120
Hepatitis B                                120
Aller

# Remove NaN Values

In [4]:
df.fillna(0, inplace=True)

# Splitting b/w Train & Test Sets

In [5]:
x_train,x_test,y_train,y_test=train_test_split(df.drop(columns=['Disease']), df['Disease'], test_size=0.3, )

In [6]:
x_train

Unnamed: 0,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
3250,acidity,indigestion,headache,blurred_and_distorted_vision,excessive_hunger,stiff_neck,depression,irritability,visual_disturbances,0,0,0,0,0,0,0,0
1674,itching,vomiting,yellowish_skin,loss_of_appetite,abdominal_pain,yellowing_of_eyes,0,0,0,0,0,0,0,0,0,0,0
4732,skin_rash,chills,joint_pain,vomiting,fatigue,high_fever,headache,nausea,loss_of_appetite,pain_behind_the_eyes,back_pain,malaise,muscle_pain,red_spots_over_body,0,0,0
2725,joint_pain,vomiting,yellowish_skin,dark_urine,nausea,loss_of_appetite,abdominal_pain,diarrhoea,mild_fever,yellowing_of_eyes,muscle_pain,0,0,0,0,0,0
3876,joint_pain,vomiting,fatigue,yellowish_skin,dark_urine,nausea,loss_of_appetite,abdominal_pain,yellowing_of_eyes,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3461,skin_rash,chills,joint_pain,vomiting,fatigue,high_fever,headache,nausea,loss_of_appetite,pain_behind_the_eyes,back_pain,malaise,muscle_pain,red_spots_over_body,0,0,0
4025,fatigue,weight_loss,restlessness,lethargy,irregular_sugar_level,blurred_and_distorted_vision,obesity,excessive_hunger,increased_appetite,polyuria,0,0,0,0,0,0,0
119,acidity,indigestion,headache,blurred_and_distorted_vision,excessive_hunger,stiff_neck,depression,irritability,0,0,0,0,0,0,0,0,0
1899,chills,vomiting,fatigue,weight_loss,cough,high_fever,breathlessness,loss_of_appetite,mild_fever,yellowing_of_eyes,swelled_lymph_nodes,malaise,phlegm,chest_pain,blood_in_sputum,0,0


# Data Cleaning (Without Transformers)

In [7]:
class SymtomsCleaner(BaseEstimator, TransformerMixin):
    def fit(self,X,y=None):
        return self
    def transform (self,X,y=None):
        lis=[]
        for i in list(X.values.tolist()):
            l=[]
            for j in i:
                if(j!=0):
                    l.append(j)
                else:
                    continue
            lis.append(l)
        return lis

In [11]:
x_cleaned=SymtomsCleaner().fit_transform(x_train)
y_train

3250               Migraine
1674    Chronic cholestasis
4732                 Dengue
2725            hepatitis A
3876            Hepatitis D
               ...         
3461                 Dengue
4025              Diabetes 
119                Migraine
1899           Tuberculosis
475                    AIDS
Name: Disease, Length: 3444, dtype: object

In [9]:
mlb=MultiLabelBinarizer()
x_final=mlb.fit_transform(x_cleaned)
mlb.classes_

array([' abdominal_pain', ' abnormal_menstruation', ' acidity',
       ' acute_liver_failure', ' altered_sensorium', ' anxiety',
       ' back_pain', ' belly_pain', ' blackheads', ' bladder_discomfort',
       ' blister', ' blood_in_sputum', ' bloody_stool',
       ' blurred_and_distorted_vision', ' breathlessness',
       ' brittle_nails', ' bruising', ' burning_micturition',
       ' chest_pain', ' chills', ' cold_hands_and_feets', ' coma',
       ' congestion', ' constipation', ' continuous_feel_of_urine',
       ' continuous_sneezing', ' cough', ' cramps', ' dark_urine',
       ' dehydration', ' depression', ' diarrhoea',
       ' dischromic _patches', ' distention_of_abdomen', ' dizziness',
       ' drying_and_tingling_lips', ' enlarged_thyroid',
       ' excessive_hunger', ' extra_marital_contacts', ' family_history',
       ' fast_heart_rate', ' fatigue', ' fluid_overload',
       ' foul_smell_of urine', ' headache', ' high_fever',
       ' hip_joint_pain', ' history_of_alcohol_

In [12]:
x_final[0]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [54]:
def y_transform(y):
    return np.array(y)

In [55]:
y_final=y_transform(y_train)

In [56]:
x_test_final=MyPipe().fit_transform(x_test)

In [57]:
y_test_final=y_transform(y_test)

# Training the Model

In [58]:
rnf=RandomForestClassifier(n_jobs=-1)
rnf.fit(x_final,y_final)

In [59]:
cross_val_score(rnf,x_final,y_final, cv=3, scoring='accuracy')

array([1., 1., 1.])

In [60]:
knn=KNeighborsClassifier()
knn.fit(x_final,y_final)

In [61]:
sgd=SGDClassifier(n_jobs=-1)
sgd.fit(x_final,y_final)

In [62]:


cross_val_score(sgd,x_final,y_final, cv=5, scoring='accuracy')

array([1., 1., 1., 1., 1.])

In [63]:
cvp=cross_val_predict(sgd,x_final,y_final,)

In [64]:
confusion_matrix(cvp,y_final)

array([[78,  0,  0, ...,  0,  0,  0],
       [ 0, 89,  0, ...,  0,  0,  0],
       [ 0,  0, 77, ...,  0,  0,  0],
       ...,
       [ 0,  0,  0, ..., 73,  0,  0],
       [ 0,  0,  0, ...,  0, 89,  0],
       [ 0,  0,  0, ...,  0,  0, 77]])

In [65]:
precision_score(cvp,y_final,average='macro')

1.0

In [66]:
recall_score(cvp,y_final,average='macro')

1.0

In [67]:
accuracy_score(knn.predict(x_test_final),y_test_final)

1.0

In [68]:
knn.predict(x_test_final)

array(['Hepatitis E', 'Paralysis (brain hemorrhage)', 'Diabetes ', ...,
       'Jaundice', 'Osteoarthristis', 'Allergy'], dtype=object)

In [69]:
dump(knn, pathlib.Path("symptoms-disease_model.joblib"))

['symptoms-disease_model.joblib']

In [70]:
mlb.classes_

array([' abdominal_pain', ' abnormal_menstruation', ' acidity',
       ' acute_liver_failure', ' altered_sensorium', ' anxiety',
       ' back_pain', ' belly_pain', ' blackheads', ' bladder_discomfort',
       ' blister', ' blood_in_sputum', ' bloody_stool',
       ' blurred_and_distorted_vision', ' breathlessness',
       ' brittle_nails', ' bruising', ' burning_micturition',
       ' chest_pain', ' chills', ' cold_hands_and_feets', ' coma',
       ' congestion', ' constipation', ' continuous_feel_of_urine',
       ' continuous_sneezing', ' cough', ' cramps', ' dark_urine',
       ' dehydration', ' depression', ' diarrhoea',
       ' dischromic _patches', ' distention_of_abdomen', ' dizziness',
       ' drying_and_tingling_lips', ' enlarged_thyroid',
       ' excessive_hunger', ' extra_marital_contacts', ' family_history',
       ' fast_heart_rate', ' fatigue', ' fluid_overload',
       ' foul_smell_of urine', ' headache', ' high_fever',
       ' hip_joint_pain', ' history_of_alcohol_