In [15]:
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
from sklearn.preprocessing import LabelEncoder
import numpy as np

In [16]:
df = pd.read_csv('heart.csv')

In [17]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [18]:
df.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1025 non-null   int64  
 1   sex       1025 non-null   int64  
 2   cp        1025 non-null   int64  
 3   trestbps  1025 non-null   int64  
 4   chol      1025 non-null   int64  
 5   fbs       1025 non-null   int64  
 6   restecg   1025 non-null   int64  
 7   thalach   1025 non-null   int64  
 8   exang     1025 non-null   int64  
 9   oldpeak   1025 non-null   float64
 10  slope     1025 non-null   int64  
 11  ca        1025 non-null   int64  
 12  thal      1025 non-null   int64  
 13  target    1025 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 112.2 KB


In [21]:
df['age'] = pd.cut(df['age'],bins=3,labels=['young','middle','old'])
df['chol'] = pd.cut(df['chol'],bins=3, labels = ['low','norm','high'])

In [22]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,middle,1,0,125,low,0,1,168,0,1.0,2,2,3,0
1,middle,1,0,140,low,1,0,155,1,3.1,0,0,3,0
2,old,1,0,145,low,0,1,125,1,2.6,0,0,3,0
3,middle,1,0,148,low,0,1,161,0,0.0,2,1,3,0
4,old,0,0,138,norm,1,1,106,0,1.9,1,3,2,0


In [25]:
model = BayesianNetwork([('age','target'),
                        ('chol','target'),
                        ('cp','target'),
                        ('target','thalach')])

In [26]:
model.edges()

OutEdgeView([('age', 'target'), ('target', 'thalach'), ('chol', 'target'), ('cp', 'target')])

In [27]:
model.fit(df,estimator = MaximumLikelihoodEstimator)

In [28]:
infer = VariableElimination(model)

In [32]:
result = infer.query(variables=['target'],evidence={'age':'old','chol':'high'})
print(result)

+-----------+---------------+
| target    |   phi(target) |
| target(0) |        0.3615 |
+-----------+---------------+
| target(1) |        0.6385 |
+-----------+---------------+


In [36]:
result = infer.query(variables=['target'],evidence={'age':'middle','chol':'norm','cp':2})
print(result)

+-----------+---------------+
| target    |   phi(target) |
| target(0) |        0.0000 |
+-----------+---------------+
| target(1) |        1.0000 |
+-----------+---------------+
