In [17]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import KBinsDiscretizer
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator, HillClimbSearch
from pgmpy.inference import VariableElimination

In [19]:
df = pd.read_csv('../data/diabetes.csv')

In [21]:
df.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [23]:
discretizer = KBinsDiscretizer(n_bins = 3, encode = 'ordinal', strategy = 'uniform')
discrete_data = df.copy()
discrete_data.iloc[:,:-1] = discretizer.fit_transform(discrete_data.iloc[:, :-1])
discrete_data = discrete_data.astype(int)
discrete_data.head()



Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,1,2,1,1,0,1,0,1,1
1,0,1,1,0,0,1,0,0,0
2,1,2,1,0,0,1,0,0,1
3,0,1,1,0,0,1,0,0,0
4,0,2,0,1,0,1,2,0,1


In [31]:
model = DiscreteBayesianNetwork([
    ('Glucose', 'Outcome'),
    ('BMI', 'Outcome'),
    ('Age', 'Outcome'),
    ('Pregnancies', 'Outcome')
])

In [33]:
model.fit(discrete_data, estimator=MaximumLikelihoodEstimator)

INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data: 
 {'Pregnancies': 'N', 'Glucose': 'N', 'BloodPressure': 'N', 'SkinThickness': 'N', 'Insulin': 'N', 'BMI': 'N', 'DiabetesPedigreeFunction': 'N', 'Age': 'N', 'Outcome': 'N'}


<pgmpy.models.DiscreteBayesianNetwork.DiscreteBayesianNetwork at 0x2524ee131a0>

In [35]:
inference = VariableElimination(model)
result = inference.query(variables=['Outcome'], evidence={'Glucose': 2, 'BMI': 2})
print("Probability of diabetes given symptoms:")
print(result)

Probability of diabetes given symptoms:
+------------+----------------+
| Outcome    |   phi(Outcome) |
| Outcome(0) |         0.3180 |
+------------+----------------+
| Outcome(1) |         0.6820 |
+------------+----------------+
