In [64]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, KBinsDiscretizer
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import HillClimbSearch, BayesianEstimator
from pgmpy.inference import VariableElimination    

In [66]:
df_ckd = df = pd.read_csv('../data/ckd_clean.csv')

In [68]:
target = 'Class'

In [70]:
df_encoded = df.copy()
categorical_cols = df.select_dtypes(include='object').columns

for col in categorical_cols:
    df_encoded[col] = LabelEncoder().fit_transform(df[col].astype(str))

In [72]:
numerical_cols = df_encoded.select_dtypes(include = ['int64', 'float64']).drop(columns = [target]).columns
discretizer = KBinsDiscretizer(n_bins = 3, encode = 'ordinal', strategy = 'uniform')
df_encoded[numerical_cols] = discretizer.fit_transform(df_encoded[numerical_cols])
df_encoded = df_encoded.astype(int)
df_encoded.head()



Unnamed: 0,Age,Blood Pressure,Specific Gravity,Albumin,Sugar,Red Blood Cells,Pus Cell,Pus Cell clumps,Bacteria,Blood Glucose Random,...,Packed Cell Volume,White Blood Cell Count,Red Blood Cell Count,Hypertension,Diabetes Mellitus,Coronary Artery Disease,Appetite,Pedal Edema,Anemia,Class
0,1,1,0,2,0,1,0,1,0,0,...,1,0,0,1,0,0,1,1,1,1
1,1,2,2,1,0,0,0,1,0,0,...,1,1,0,1,1,0,1,0,1,1
2,2,1,0,2,0,0,0,1,0,2,...,1,0,0,1,1,0,1,1,0,1
3,2,1,0,2,1,1,0,1,1,0,...,0,0,0,1,1,1,1,1,0,1
4,2,1,1,1,0,0,0,0,0,0,...,1,0,0,1,1,1,1,1,1,1


In [74]:
hc = HillClimbSearch(df_encoded)
best_model = hc.estimate()
print("Learned edges", best_model.edges())

INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data: 
 {'Age': 'N', 'Blood Pressure': 'N', 'Specific Gravity': 'N', 'Albumin': 'N', 'Sugar': 'N', 'Red Blood Cells': 'N', 'Pus Cell': 'N', 'Pus Cell clumps': 'N', 'Bacteria': 'N', 'Blood Glucose Random': 'N', 'Blood Urea': 'N', 'Serum Creatinine': 'N', 'Sodium': 'N', 'Potassium': 'N', 'Hemoglobin': 'N', 'Packed Cell Volume': 'N', 'White Blood Cell Count': 'N', 'Red Blood Cell Count': 'N', 'Hypertension': 'N', 'Diabetes Mellitus': 'N', 'Coronary Artery Disease': 'N', 'Appetite': 'N', 'Pedal Edema': 'N', 'Anemia': 'N', 'Class': 'N'}
INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data: 
 {'Age': 'N', 'Blood Pressure': 'N', 'Specific Gravity': 'N', 'Albumin': 'N', 'Sugar': 'N', 'Red Blood Cells': 'N', 'Pus Cell': 'N', 'Pus Cell clumps': 'N', 'Bacteria': 'N', 'Blood Glucose Random': 'N', 'Blood Urea': 'N', 'Serum Creatinine': 'N', 'Sodium': 'N',

  0%|          | 0/1000000 [00:00<?, ?it/s]

Learned edges [('Specific Gravity', 'Bacteria'), ('Serum Creatinine', 'Blood Urea'), ('Hemoglobin', 'Packed Cell Volume'), ('Hemoglobin', 'Class'), ('Hemoglobin', 'Pus Cell'), ('Packed Cell Volume', 'Anemia'), ('Red Blood Cell Count', 'Serum Creatinine'), ('Red Blood Cell Count', 'Pus Cell clumps'), ('Hypertension', 'Coronary Artery Disease'), ('Hypertension', 'Sugar'), ('Hypertension', 'Age'), ('Class', 'Albumin'), ('Class', 'Specific Gravity'), ('Class', 'Hypertension'), ('Class', 'Red Blood Cell Count'), ('Class', 'Diabetes Mellitus'), ('Class', 'Pedal Edema'), ('Class', 'Appetite'), ('Class', 'Sodium'), ('Class', 'Blood Glucose Random'), ('Class', 'Red Blood Cells'), ('Class', 'Blood Pressure'), ('Class', 'White Blood Cell Count')]


In [76]:
model = DiscreteBayesianNetwork(best_model.edges())
model.fit(df_encoded, estimator = BayesianEstimator, prior_type = 'BDeu')

INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data: 
 {'Age': 'N', 'Blood Pressure': 'N', 'Specific Gravity': 'N', 'Albumin': 'N', 'Sugar': 'N', 'Red Blood Cells': 'N', 'Pus Cell': 'N', 'Pus Cell clumps': 'N', 'Bacteria': 'N', 'Blood Glucose Random': 'N', 'Blood Urea': 'N', 'Serum Creatinine': 'N', 'Sodium': 'N', 'Potassium': 'N', 'Hemoglobin': 'N', 'Packed Cell Volume': 'N', 'White Blood Cell Count': 'N', 'Red Blood Cell Count': 'N', 'Hypertension': 'N', 'Diabetes Mellitus': 'N', 'Coronary Artery Disease': 'N', 'Appetite': 'N', 'Pedal Edema': 'N', 'Anemia': 'N', 'Class': 'N'}


<pgmpy.models.DiscreteBayesianNetwork.DiscreteBayesianNetwork at 0x144418d5820>

In [92]:
inference = VariableElimination(model)
result = inference.query(variables=['Class'], evidence={
    'Blood Glucose Random': 2,
    'Albumin': 1,
    'Appetite': 0,
    'Hypertension': 1,
    'Packed Cell Volume': 2,
    'Age': 2,
})
print("Probability of disease given symptoms:")
print(result)

Probability of disease given symptoms:
+----------+--------------+
| Class    |   phi(Class) |
| Class(0) |       0.0016 |
+----------+--------------+
| Class(1) |       0.9984 |
+----------+--------------+


In [103]:
print(model.get_cpds('Class'))

+------------+---------------------+-----+----------------------+
| Hemoglobin | Hemoglobin(0)       | ... | Hemoglobin(2)        |
+------------+---------------------+-----+----------------------+
| Class(0)   | 0.09615384615384616 | ... | 0.976123595505618    |
+------------+---------------------+-----+----------------------+
| Class(1)   | 0.9038461538461539  | ... | 0.023876404494382025 |
+------------+---------------------+-----+----------------------+


In [105]:
for cpd in model.get_cpds():
    print(cpd)


+---------------------+----------------------+---------------------+
| Class               | Class(0)             | Class(1)            |
+---------------------+----------------------+---------------------+
| Specific Gravity(0) | 0.007092198581560284 | 0.5897435897435898  |
+---------------------+----------------------+---------------------+
| Specific Gravity(1) | 0.007092198581560284 | 0.2380952380952381  |
+---------------------+----------------------+---------------------+
| Specific Gravity(2) | 0.9858156028368794   | 0.17216117216117216 |
+---------------------+----------------------+---------------------+
+------------------+-----+----------------------+
| Specific Gravity | ... | Specific Gravity(2)  |
+------------------+-----+----------------------+
| Bacteria(0)      | ... | 0.9932614555256065   |
+------------------+-----+----------------------+
| Bacteria(1)      | ... | 0.006738544474393532 |
+------------------+-----+----------------------+
+----------------------+-----