In [13]:
import pandas as pd 
from pgmpy.models import DiscreteBayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination

In [11]:
df = pd.read_csv('heart_simple.csv')
df.head()


Unnamed: 0,Age,Cholesterol,BP,HeartDisease
0,Old,High,High,Yes
1,Old,High,High,Yes
2,Old,High,Normal,Yes
3,Old,Normal,High,Yes
4,Old,Normal,Normal,No


In [14]:
model = DiscreteBayesianNetwork([
    ('Age','HeartDisease'),
    ('Cholesterol','HeartDisease'),
    ('BP','HeartDisease')
])

In [18]:
print("Training Model.....")
model.fit(df,estimator = MaximumLikelihoodEstimator)
print("Training Complete")


INFO:pgmpy: Datatype (N=numerical, C=Categorical Unordered, O=Categorical Ordered) inferred from data: 
 {'Age': 'C', 'Cholesterol': 'C', 'BP': 'C', 'HeartDisease': 'C'}


Training Model.....
Training Complete


In [20]:
inference = VariableElimination(model)

In [22]:
#prediction based on available evidence
#Case 1 : High risk patient

result1 = inference.query(
    variables = ['HeartDisease'],
    evidence = {'Age':'Old','Cholesterol':'High','BP':'High'}
)
print(result1)

+-------------------+---------------------+
| HeartDisease      |   phi(HeartDisease) |
| HeartDisease(No)  |              0.0000 |
+-------------------+---------------------+
| HeartDisease(Yes) |              1.0000 |
+-------------------+---------------------+


In [24]:
#Case 2 : Low Risk

result2 = inference.query(
    variables = ['HeartDisease'],
    evidence = {'Age':'Young','Cholesterol':'Normal','BP':'Normal'}
)
print(result2)

+-------------------+---------------------+
| HeartDisease      |   phi(HeartDisease) |
| HeartDisease(No)  |              1.0000 |
+-------------------+---------------------+
| HeartDisease(Yes) |              0.0000 |
+-------------------+---------------------+
