# Symptoms Based Disease Predition and Its Treatments
A Bangkit Final Project proposed by B21-CAP0170

There are a lot of diseases that you need to detect at an early stage to be able to identify the treatment plan early on and help the patient secure a good way to live. An early detection of disease and a precise diagnosis allows for quicker action, saving precious time, and to prevent complications and rapid worsening. Health practitioners have conducted surveys and collected data on patient information, their disease, and symptoms that allow them to distinguish the patient's disease with common symptoms. Therefore, the dataset can be used to train the model that predicts the disease based on the symptoms.

## Importing Library

In [None]:
# Importing Libraries
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import tensorflow_decision_forests as tfdf
import tensorflow as tf

try:
  from wurlitzer import sys_pipes
except:
  from colabtools.googlelog import CaptureLog as sys_pipes

In [None]:
# Read the dataset csv file
df = pd.read_csv("Dataset/dataset.csv")
df_precaution = pd.read_csv("Dataset/symptom_precaution.csv")
df_description = pd.read_csv("Dataset/symptom_description.csv")
df_precaution = df_precaution.fillna(0)
df_description.to_dict(orient='records')

In [None]:
# Filter DataFrame to disease only (delete all symptom columns)
disease_only = df
for a in range(1, 18):
    disease_only = disease_only.drop([df.columns[a]], axis=1)

disease_only.head()

In [None]:
# Put all unique disease to a list
disease_list = disease_only['Disease'].unique().tolist()
print(f"Labels of diseases: {disease_list}")

In [None]:
# The total number of the diseases in the dataset
len(disease_list)

In [None]:
# Filter DataFrame to symptoms only (delete the disease column)
first_column = df.columns[0]
symptoms_only = df.drop([first_column], axis=1)

# Saving to new file
# df.to_csv('file.csv', index=False)

symptoms_only.head()

In [None]:
# Put all unique symptoms to a list
symptoms = []

for i in range(1, 18):
    for x in symptoms_only[f'Symptom_{i}'].tolist():
        if str(x) != 'nan' and str(x) not in symptoms:
            symptoms.append(x)


print(f"Featured symptoms: {symptoms}")

In [None]:
# The total number of the symptoms in the dataset
len(symptoms)

In [None]:
# Get a list like this --> ['Symptom_1', Symptom_2, ..., Symptom_17 ]
cols = [i for i in df.iloc[:,1:].columns]
print(cols)

In [None]:
# Turns all columns in 'cols' to variable column.
# The number of rows of tmp DataFrame is 17x from the original DataFrame

tmp = pd.melt(df.reset_index() ,id_vars = ['index'], value_vars = cols )

# Adding a value to each symptoms column by 1
tmp['add_value'] = 1
tmp.head()

In [None]:
pd.set_option('display.max_columns', 150)

# Pivot table
diseases = pd.pivot_table(tmp, 
                          values = 'add_value',
                          index = 'index',
                          columns = 'value')

# Add labels column
diseases.insert(0,'label',df['Disease'])

# Fill NaN with zero
diseases = diseases.fillna(0)
# diseases.head()

symptoms_list = []
for column in diseases:
    if column != 'label':
        symptoms_list.append(column)

diseases.head()

# TensorFlow Decision Forests Algorithm

In [None]:
"""
Our dataset classification label is represented as a string.
Keras expected classification labels to be integers.
"""

# Assign the label column name to a variable
label = "label"

# Converting string to a unique integers for each diseases
diseases[label] = diseases[label].map(disease_list.index)
diseases.head()

In [None]:
# Split dataset for training 70% and test 30%
train_7 = diseases.sample(frac = 0.7, random_state = 1)
test_7 = diseases.drop(index = train_7.index)

# Split dataset for training 80% and test 20%
train_8 = diseases.sample(frac = 0.8, random_state = 1)
test_8 = diseases.drop(index = train_8.index)

# Split dataset for training 90% and test 10%
train_9 = diseases.sample(frac = 0.9, random_state = 1)
test_9 = diseases.drop(index = train_9.index)

train = train_7
test = test_7

In [None]:
# Shape of training data
train.shape

In [None]:
# Shape of test data
test.shape

In [None]:
# Convert the pandas dataframe to tensorflow datasets
train_tfdf = tfdf.keras.pd_dataframe_to_tf_dataset(train, label=label)
test_tfdf = tfdf.keras.pd_dataframe_to_tf_dataset(test, label=label)

In [None]:
# Specify the model
model = tfdf.keras.RandomForestModel()

# Optionally, add evaluation metrics
model.compile(
    metrics=["accuracy"]
)

# Train the model
with sys_pipes():
    model.fit(x=train_tfdf)

In [None]:
evaluation = model.evaluate(test_tfdf, return_dict=True)
print()

for name, value in evaluation.items():
  print(f"{name}: {value:.4f}")

In [None]:
model.summary()

In [None]:
# Save the model to local directory
# model.save('saved_model/my_model')

# TESTING THE MODEL FROM TENSORFLOW DECISION FORESTS

In [1]:
# Importing Libraries
import numpy as np
import pandas as pd 
import tensorflow_decision_forests as tfdf
import tensorflow as tf
from tensorflow import keras

In [2]:
# Sorted list of Disease
disease_list = ['Fungal infection', 'Allergy', 'GERD', 'Chronic cholestasis', 'Drug Reaction', 'Peptic ulcer disease', 
                'AIDS', 'Diabetes', 'Gastroenteritis', 'Bronchial Asthma', 'Hypertension', 'Migraine', 'Cervical spondylosis', 
                'Brain hemorrhage', 'Jaundice', 'Malaria', 'Chicken pox', 'Dengue', 'Typhoid', 'hepatitis A', 'Hepatitis B', 
                'Hepatitis C', 'Hepatitis D', 'Hepatitis E', 'Alcoholic hepatitis', 'Tuberculosis', 'Common Cold', 'Pneumonia', 
                'Dimorphic hemorrhoids', 'Heart attack', 'Varicose veins', 'Hypothyroidism', 'Hyperthyroidism', 'Hypoglycemia', 
                'Osteoarthristis', 'Arthritis', 'Benign paroxysmal positional vertigo', 'Acne', 'Urinary tract infection', 
                'Psoriasis', 'Impetigo']

# Sorted list of Symptom
symptoms_list = ['abdominal_pain', 'abnormal_menstruation', 'acidity', 'acute_liver_failure', 'altered_sensorium', 
                 'anxiety','back_pain', 'belly_pain', 'blackheads', 'bladder_discomfort', 'blister', 'blood_in_sputum', 
                 'bloody_stool','blurred_and_distorted_vision', 'breathlessness', 'brittle_nails', 'bruising', 
                 'burning_micturition', 'chest_pain', 'chills', 'cold_hands_and_feets', 'coma', 'congestion', 
                 'constipation', 'continuous_feel_of_urine', 'continuous_sneezing', 'cough', 'cramps', 'dark_urine', 
                 'dehydration', 'depression', 'diarrhoea', 'dischromic_patches', 'distention_of_abdomen', 'dizziness', 
                 'drying_and_tingling_lips', 'enlarged_thyroid', 'excessive_hunger', 'extra_marital_contacts', 
                 'family_history', 'fast_heart_rate', 'fatigue', 'fluid_overload', 'foul_smell_of_urine', 'headache', 
                 'high_fever', 'hip_joint_pain', 'history_of_alcohol_consumption', 'increased_appetite', 'indigestion', 
                 'inflammatory_nails', 'internal_itching', 'irregular_sugar_level', 'irritability', 'irritation_in_anus', 
                 'itching', 'joint_pain', 'knee_pain', 'lack_of_concentration', 'lethargy', 'loss_of_appetite', 
                 'loss_of_balance', 'loss_of_smell', 'malaise', 'mild_fever', 'mood_swings', 'movement_stiffness', 
                 'mucoid_sputum', 'muscle_pain', 'muscle_wasting', 'muscle_weakness', 'nausea', 'neck_pain', 
                 'nodal_skin_eruptions', 'obesity', 'pain_behind_the_eyes', 'pain_during_bowel_movements', 
                 'pain_in_anal_region', 'painful_walking', 'palpitations', 'passage_of_gases', 'patches_in_throat', 'phlegm', 
                 'polyuria', 'prominent_veins_on_calf', 'puffy_face_and_eyes', 'pus_filled_pimples', 
                 'receiving_blood_transfusion', 'receiving_unsterile_injections', 'red_sore_around_nose', 
                 'red_spots_over_body', 'redness_of_eyes', 'restlessness', 'runny_nose', 'rusty_sputum', 'scurring', 
                 'shivering', 'silver_like_dusting', 'sinus_pressure', 'skin_peeling', 'skin_rash', 'slurred_speech', 
                 'small_dents_in_nails', 'spinning_movements', 'spotting_urination', 'stiff_neck', 'stomach_bleeding', 
                 'stomach_pain', 'sunken_eyes', 'sweating', 'swelled_lymph_nodes', 'swelling_joints', 'swelling_of_stomach', 
                 'swollen_blood_vessels', 'swollen_extremeties', 'swollen_legs', 'throat_irritation', 'toxic_look', 
                 'ulcers_on_tongue', 'unsteadiness', 'visual_disturbances', 'vomiting', 'watering_from_eyes', 
                 'weakness_in_limbs', 'weakness_of_one_body_side', 'weight_gain', 'weight_loss', 'yellow_crust_ooze', 
                 'yellow_urine', 'yellowing_of_eyes', 'yellowish_skin']

In [3]:
# Importing the TensorFlow Decision Forests Model
model_path = 'saved_model/my_model'
imported = keras.models.load_model(model_path)

In [4]:
# User inputs will be like this
user_inputs = {
    "symptom_1": "itching",
    "symptom_2": "nodal_skin_eruptions",
    "symptom_3": "dischromic_patches",
    "symptom_4": "skin_rash"
}

# Defining the inputs for the model
model_inputs = []
for i in symptoms_list:
    model_inputs.append(float(0))

# Creating the inputs list for the model
for i in user_inputs.values():
    if i != '0':
        symptom_index = symptoms_list.index(i)
        model_inputs[symptom_index] = float(1)

# Creating the Input DataFrame and convert to tensorflow dataset
df_inputs = pd.DataFrame([model_inputs], columns=symptoms_list)
model_inputs = tfdf.keras.pd_dataframe_to_tf_dataset(df_inputs, label=None)

In [5]:
# Predict the data
prediction = imported.predict(model_inputs)
predicted = prediction[0]

# Predicted data will be list of the probability  based on Disease List Index
print(predicted)

[0.9933325  0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.00666667 0.         0.         0.        ]


In [6]:
# 5. Initialize the highest probability variable
highest_probability = max(predicted)

# 7. Catch the disease index
disease_index = np.where(predicted == highest_probability)
disease_index = disease_index[0][0]

# 8. Find the disease based on the disease index
predicted_disease = disease_list[disease_index]

# 9. The probability percentage
probability = highest_probability*100

print(f'Probability: {probability} %')
print(f'Predicted disease: {predicted_disease}\n')

Probability: 99.33325052261353 %
Predicted disease: Fungal infection

