In [1]:
import pandas as pd
import numpy as np
import pickle
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
data = pd.read_csv("Data/dataset.csv")

In [9]:
AllSymptoms = ['abdominal_pain',
 'abnormal_menstruation',
 'acidity',
 'acute_liver_failure',
 'altered_sensorium',
 'anxiety',
 'back_pain',
 'belly_pain',
 'blackheads',
 'bladder_discomfort',
 'blister',
 'blood_in_sputum',
 'bloody_stool',
 'blurred_and_distorted_vision',
 'breathlessness',
 'brittle_nails',
 'bruising',
 'burning_micturition',
 'chest_pain',
 'chills',
 'cold_hands_and_feets',
 'coma',
 'congestion',
 'constipation',
 'continuous_feel_of_urine',
 'continuous_sneezing',
 'cough',
 'cramps',
 'dark_urine',
 'dehydration',
 'depression',
 'diarrhoea',
 'dischromic _patches',
 'distention_of_abdomen',
 'dizziness',
 'drying_and_tingling_lips',
 'enlarged_thyroid',
 'excessive_hunger',
 'extra_marital_contacts',
 'family_history',
 'fast_heart_rate',
 'fatigue',
 'fluid_overload',
 'foul_smell_of urine',
 'headache',
 'high_fever',
 'hip_joint_pain',
 'history_of_alcohol_consumption',
 'increased_appetite',
 'indigestion',
 'inflammatory_nails',
 'internal_itching',
 'irregular_sugar_level',
 'irritability',
 'irritation_in_anus',
 'joint_pain',
 'knee_pain',
 'lack_of_concentration',
 'lethargy',
 'loss_of_appetite',
 'loss_of_balance',
 'loss_of_smell',
 'malaise',
 'mild_fever',
 'mood_swings',
 'movement_stiffness',
 'mucoid_sputum',
 'muscle_pain',
 'muscle_wasting',
 'muscle_weakness',
 'nausea',
 'neck_pain',
 'nodal_skin_eruptions',
 'obesity',
 'pain_behind_the_eyes',
 'pain_during_bowel_movements',
 'pain_in_anal_region',
 'painful_walking',
 'palpitations',
 'passage_of_gases',
 'patches_in_throat',
 'phlegm',
 'polyuria',
 'prominent_veins_on_calf',
 'puffy_face_and_eyes',
 'pus_filled_pimples',
 'receiving_blood_transfusion',
 'receiving_unsterile_injections',
 'red_sore_around_nose',
 'red_spots_over_body',
 'redness_of_eyes',
 'restlessness',
 'runny_nose',
 'rusty_sputum',
 'scurring',
 'shivering',
 'silver_like_dusting',
 'sinus_pressure',
 'skin_peeling',
 'skin_rash',
 'slurred_speech',
 'small_dents_in_nails',
 'spinning_movements',
 'spotting_ urination',
 'stiff_neck',
 'stomach_bleeding',
 'stomach_pain',
 'sunken_eyes',
 'sweating',
 'swelled_lymph_nodes',
 'swelling_joints',
 'swelling_of_stomach',
 'swollen_blood_vessels',
 'swollen_extremeties',
 'swollen_legs',
 'throat_irritation',
 'toxic_look_(typhos)',
 'ulcers_on_tongue',
 'unsteadiness',
 'visual_disturbances',
 'vomiting',
 'watering_from_eyes',
 'weakness_in_limbs',
 'weakness_of_one_body_side',
 'weight_gain',
 'weight_loss',
 'yellow_crust_ooze',
 'yellow_urine',
 'yellowing_of_eyes',
 'yellowish_skin',
 'itching']

In [10]:
Disease = ['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis',
           'Drug Reaction', 'Peptic ulcer diseae', 'AIDS', 'Diabetes ',
           'Gastroenteritis', 'Bronchial Asthma', 'Hypertension ', 'Migraine',
           'Cervical spondylosis', 'Paralysis (brain hemorrhage)', 'Jaundice',
           'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'hepatitis A',
           'Hepatitis B', 'Hepatitis C', 'Hepatitis D', 'Hepatitis E',
           'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia',
           'Dimorphic hemmorhoids(piles)', 'Heart attack', 'Varicose veins',
           'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia',
           'Osteoarthristis', 'Arthritis',
           '(vertigo) Paroymsal  Positional Vertigo', 'Acne',
           'Urinary tract infection', 'Psoriasis', 'Impetigo']

In [7]:
def create_model(data):
    cols = [i for i in data.iloc[:,1:].columns]
    tmp = pd.melt(data.reset_index() ,id_vars = ['index'], value_vars = cols )
    tmp['add'] = 1
    df = pd.pivot_table(tmp, values = 'add',index = 'index', columns = 'value')

    df.insert(0,'label',data['Disease'])
    df = df.fillna(0)
    df.columns = [x.strip() for x in list(df.columns)]
    Symptoms  = list(df.columns)
    Symptoms.remove('label')

    ds_train = df.sample(frac = 0.7, random_state = 1)
    ds_test = df.drop(index = ds_train.index)

    x_train, y_train, x_test, y_test =  ds_train.drop('label', axis = 1), ds_train['label'], ds_test.drop('label', axis = 1), ds_test['label']

    rfc = RandomForestClassifier()
    rfc.fit(x_train, y_train)
    result = rfc.predict(x_test)
    print(accuracy_score(y_test, result))
    # Model save
    with open('model.pkl','wb') as f:
        pickle.dump(rfc,f)

def Add_Disease(newDisease):
    data = pd.read_csv("Updated_data.csv")
    del data['Unnamed: 0']
    newDiseaseLwr = newDisease.lower()
    if newDiseaseLwr in [x.lower() for x in list(set(data['Disease'].values))]:
        print("Disease already available in Dataset..")
        return data
    print("Enter Symptoms ")
    print("enter \"end\" when end Symptoms after last Symptoms entered" )
    i = 0
    Symptoms = []
    while i<17:
        print("Symptom ",i)
        Symptom = input()
        if Symptom=='end':
            break
        Symptoms.append(Symptom)
        i=i+1
    a = []
    # Add nan value upto length of list Symptoms
    for i in range(17):
        try:
            v = Symptoms[i]
        except IndexError:
            v = np.nan
        a.append(v)
    row = []
    row.insert(0,newDiseaseLwr)
    row.extend(a)
    data.loc[len(df.index)] = row
    print("Disease add successfully")
    return data

nds = input("Enter new Disease Name :")
df = Add_Disease(nds)
df.to_csv("Updated_data.csv")
create_model(df)

In [3]:
data.head()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
3,Fungal infection,itching,skin_rash,dischromic _patches,,,,,,,,,,,,,,
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,


In [4]:
data.shape

(4920, 18)

In [5]:
data.to_csv("Updated_data.csv")

In [86]:
data1 = pd.read_csv("Updated_data.csv")
del data1['Unnamed: 0']
data1.shape

(4920, 18)

In [87]:
df = data1

In [93]:
def Add_Disease(newDisease):
    data = pd.read_csv("Updated_data.csv")
    del data['Unnamed: 0']
    newDiseaseLwr = newDisease.lower()
    if newDiseaseLwr in [x.lower() for x in list(set(data['Disease'].values))]:
        print("Disease already available in Dataset..")
        return data
    print("Enter Symptoms ")
    print("enter \"end\" when end Symptoms after last Symptoms entered" )
    i = 0
    Symptoms = []
    while i<17:
        print("Symptom ",i)
        Symptom = input()
        if Symptom=='end':
            break
        Symptoms.append(Symptom)
        i=i+1
    a = []
    # Add nan value upto length of list Symptoms
    for i in range(17):
        try:
            v = Symptoms[i]
        except IndexError:
            v = np.nan
        a.append(v)
    row = []
    row.insert(0,newDiseaseLwr)
    row.extend(a)
    data.loc[len(df.index)] = row   # Append new row at last to dataset 
    print("Disease add successfully")
    return data

In [95]:
nds = input("Enter new Disease Name :")
df = Add_Disease(nds)
df.to_csv("Updated_data.csv")
create_model(df)

Enter new Disease Name :sagar
Enter Symptoms 
enter "end" when end Symptoms after last Symptoms entered
Symptom  0
qq
Symptom  1
ww
Symptom  2
ee
Symptom  3
rr
Symptom  4
tt
Symptom  5
end
Disease add successfully


<bound method NDFrame.head of                       Disease             Symptom_1              Symptom_2  \
0            Fungal infection               itching              skin_rash   
1            Fungal infection             skin_rash   nodal_skin_eruptions   
2            Fungal infection               itching   nodal_skin_eruptions   
3            Fungal infection               itching              skin_rash   
4            Fungal infection               itching              skin_rash   
...                       ...                   ...                    ...   
4916                     Acne             skin_rash     pus_filled_pimples   
4917  Urinary tract infection   burning_micturition     bladder_discomfort   
4918                Psoriasis             skin_rash             joint_pain   
4919                 Impetigo             skin_rash             high_fever   
4920                    sagar                    qq                     ww   

                  Symptom_3      

In [96]:
df.tail()

Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
4916,Acne,skin_rash,pus_filled_pimples,blackheads,scurring,,,,,,,,,,,,,
4917,Urinary tract infection,burning_micturition,bladder_discomfort,foul_smell_of urine,continuous_feel_of_urine,,,,,,,,,,,,,
4918,Psoriasis,skin_rash,joint_pain,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,,,,,,,,,,,
4919,Impetigo,skin_rash,high_fever,blister,red_sore_around_nose,yellow_crust_ooze,,,,,,,,,,,,
4920,sagar,qq,ww,ee,rr,tt,,,,,,,,,,,,
