In [10]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

from sklearn.tree import export_text

import joblib

data = pd.read_csv("Filtered_Childhood_Dataset.csv")

In [11]:
#Data Prep
X = data.iloc[:, :-1]  # Symptomps
y = data.iloc[:, -1]   # Diseases
X = X.fillna(0)

le = LabelEncoder()
y = le.fit_transform(y)
display(X)

Unnamed: 0,pain chest,shortness of breath,dizziness,asthenia,fall,syncope,vertigo,sweat sweating increased,palpitation,nausea,...,hypertonicity,flatulence,gasping for breath,feces in rectum,prodrome,hypoproteinemia,abdomen acute,air fluid level,catching breath,immobile
0,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
332,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
333,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
334,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
#train & test data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
#Model
model = DecisionTreeClassifier( random_state=42)
model.fit(X_train, y_train)

In [14]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy:.2f}")

Model accuracy: 0.72


In [15]:
tree_rules = export_text(model, feature_names=X.columns.tolist())
print("\nDecision Tree Rules:")
print(tree_rules)


Decision Tree Rules:
|--- hunger <= 0.50
|   |--- asthenia <= 0.50
|   |   |--- tenesmus <= 0.50
|   |   |   |--- ataxia <= 0.50
|   |   |   |   |--- asymptomatic <= 0.50
|   |   |   |   |   |--- projectile vomiting <= 0.50
|   |   |   |   |   |   |--- hyponatremia <= 0.50
|   |   |   |   |   |   |   |--- symptom aggravating factors <= 0.50
|   |   |   |   |   |   |   |   |--- indifferent mood <= 0.50
|   |   |   |   |   |   |   |   |   |--- hypothermia, natural <= 0.50
|   |   |   |   |   |   |   |   |   |   |--- sleepy <= 0.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 27
|   |   |   |   |   |   |   |   |   |   |--- sleepy >  0.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: 5
|   |   |   |   |   |   |   |   |   |--- hypothermia, natural >  0.50
|   |   |   |   |   |   |   |   |   |   |--- class: 11
|   |   |   |   |   |   |   |   |--- indifferent mood >  0.50
|   |   |   |   |   |   |   |   |   |--- class: 2
|   |   |   |   |   |   |   |--

In [16]:
print("Disease to number mapping:")
for disease, number in zip(le.classes_, range(len(le.classes_))):
    print(f"{disease}: {number}")

Disease to number mapping:
anemia: 0
asthma: 1
bacteremia: 2
bronchitis: 3
colitis: 4
epilepsy: 5
failure kidney: 6
gastroenteritis: 7
glaucoma: 8
hepatitis: 9
hepatitis B: 10
hypoglycemia: 11
influenza: 12
ischemia: 13
kidney disease: 14
lymphoma: 15
pneumonia: 16
ulcer peptic: 17


In [17]:
def predict_disease(symptoms):
    input_data = pd.DataFrame(0, index=[0], columns=X.columns)

    for symptom in symptoms:
        if symptom in input_data.columns:
            input_data[symptom] = 1

    prediction = model.predict(input_data)
    return le.inverse_transform(prediction)[0]

In [18]:
test_symptoms = ['pain chest', 'shortness of breath','immobile']
print(f"Predicted disease for {test_symptoms}: {predict_disease(test_symptoms)}")

Predicted disease for ['pain chest', 'shortness of breath', 'immobile']: ischemia


In [19]:
print(data.columns[:-1])

Index(['pain chest', 'shortness of breath', 'dizziness', 'asthenia', 'fall',
       'syncope', 'vertigo', 'sweat sweating increased', 'palpitation',
       'nausea',
       ...
       'hypertonicity', 'flatulence', 'gasping for breath', 'feces in rectum',
       'prodrome', 'hypoproteinemia', 'abdomen acute', 'air fluid level',
       'catching breath', 'immobile'],
      dtype='object', length=325)


In [20]:
joblib.dump(model, 'childhood_disease_model.pkl')
joblib.dump(le, 'label_encoder.pkl')

['label_encoder.pkl']