# Importing Libs & Dataset

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import confusion_matrix, precision_score, recall_score,accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from joblib import dump
import pathlib
from transformers import MyPipe

In [2]:
df= pd.read_csv('dataset.csv')
df

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
3,Fungal infection,itching,skin_rash,dischromic _patches,,,,,,,,,,,,,,
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4915,(vertigo) Paroymsal Positional Vertigo,vomiting,headache,nausea,spinning_movements,loss_of_balance,unsteadiness,,,,,,,,,,,
4916,Acne,skin_rash,pus_filled_pimples,blackheads,scurring,,,,,,,,,,,,,
4917,Urinary tract infection,burning_micturition,bladder_discomfort,foul_smell_of urine,continuous_feel_of_urine,,,,,,,,,,,,,
4918,Psoriasis,skin_rash,joint_pain,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,,,,,,,,,,,


In [3]:
df['Disease'].value_counts()

Disease
Fungal infection                           120
Hepatitis C                                120
Hepatitis E                                120
Alcoholic hepatitis                        120
Tuberculosis                               120
Common Cold                                120
Pneumonia                                  120
Dimorphic hemmorhoids(piles)               120
Heart attack                               120
Varicose veins                             120
Hypothyroidism                             120
Hyperthyroidism                            120
Hypoglycemia                               120
Osteoarthristis                            120
Arthritis                                  120
(vertigo) Paroymsal  Positional Vertigo    120
Acne                                       120
Urinary tract infection                    120
Psoriasis                                  120
Hepatitis D                                120
Hepatitis B                                120
Aller

# Remove NaN Values

In [4]:
df.fillna(0, inplace=True)

# Splitting b/w Train & Test Sets

In [5]:
x_train,x_test,y_train,y_test=train_test_split(df.drop(columns=['Disease']), df['Disease'], test_size=0.3, )

In [6]:
x_train

Unnamed: 0,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
3193,vomiting,headache,nausea,spinning_movements,loss_of_balance,unsteadiness,0,0,0,0,0,0,0,0,0,0,0
2536,muscle_weakness,stiff_neck,swelling_joints,movement_stiffness,painful_walking,0,0,0,0,0,0,0,0,0,0,0,0
2448,skin_rash,joint_pain,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,0,0,0,0,0,0,0,0,0,0,0
1506,chills,fatigue,cough,breathlessness,sweating,malaise,phlegm,chest_pain,fast_heart_rate,rusty_sputum,0,0,0,0,0,0,0
595,chills,vomiting,fatigue,high_fever,headache,nausea,constipation,abdominal_pain,diarrhoea,belly_pain,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4674,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,0,0,0,0,0,0,0,0,0,0,0,0,0
1663,stomach_pain,acidity,ulcers_on_tongue,cough,chest_pain,0,0,0,0,0,0,0,0,0,0,0,0
2667,stomach_pain,acidity,ulcers_on_tongue,vomiting,cough,chest_pain,0,0,0,0,0,0,0,0,0,0,0
2251,itching,fatigue,lethargy,yellowish_skin,dark_urine,loss_of_appetite,abdominal_pain,yellow_urine,yellowing_of_eyes,malaise,receiving_blood_transfusion,receiving_unsterile_injections,0,0,0,0,0


# Data Cleaning (With Transformers)

In [13]:

x_final=MyPipe().fit_transform(x_train)
x_final

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 1, ..., 0, 0, 0],
       [1, 0, 0, ..., 1, 1, 1],
       [0, 0, 0, ..., 0, 0, 0]])

In [8]:
def y_transform(y):
    return np.array(y)

In [9]:
y_final=y_transform(y_train)

In [11]:
x_test_final=MyPipe().fit_transform(x_test)

In [12]:
y_test_final=y_transform(y_test)

# Training the Model

In [14]:
rnf=RandomForestClassifier(n_jobs=-1)
rnf.fit(x_final,y_final)

In [15]:
cross_val_score(rnf,x_final,y_final, cv=3, scoring='accuracy')

array([1., 1., 1.])

In [16]:
knn=KNeighborsClassifier()
knn.fit(x_final,y_final)

In [17]:
sgd=SGDClassifier(n_jobs=-1)
sgd.fit(x_final,y_final)

In [18]:


cross_val_score(sgd,x_final,y_final, cv=5, scoring='accuracy')

array([1., 1., 1., 1., 1.])

In [19]:
cvp=cross_val_predict(sgd,x_final,y_final,)

In [20]:
confusion_matrix(cvp,y_final)

array([[85,  0,  0, ...,  0,  0,  0],
       [ 0, 91,  0, ...,  0,  0,  0],
       [ 0,  0, 77, ...,  0,  0,  0],
       ...,
       [ 0,  0,  0, ..., 89,  0,  0],
       [ 0,  0,  0, ...,  0, 86,  0],
       [ 0,  0,  0, ...,  0,  0, 76]])

In [21]:
precision_score(cvp,y_final,average='macro')

1.0

In [22]:
recall_score(cvp,y_final,average='macro')

1.0

In [23]:
accuracy_score(knn.predict(x_test_final),y_test_final)

1.0

In [24]:
knn.predict(x_test_final)

array(['Hepatitis D', 'Malaria', 'Heart attack', ..., 'Fungal infection',
       'hepatitis A', 'Dengue'], dtype=object)

In [25]:
dump(knn, pathlib.Path("symptoms-disease_model.joblib"))

['symptoms-disease_model.joblib']