In [16]:
import numpy as np
import pandas as pd
import csv
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination

lines = list(csv.reader(open('datasets/71.csv','r')))
attributes = lines[0]
print(attributes)

['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'heartdisease']


In [17]:
heartDisease = pd.read_csv('datasets/72.csv', names=attributes)
heartDisease = heartDisease.replace('?',np.nan)
print('Examples')
print(heartDisease.head())
print()
print('Attributes and Datatypes')
print(heartDisease.dtypes)

Examples
    age  sex   cp  trestbps   chol  fbs  restecg  thalach  exang  oldpeak  slope   ca thal  heartdisease
0  63.0  1.0  1.0     145.0  233.0  1.0      2.0    150.0    0.0      2.3    3.0  0.0  6.0             0
1  67.0  1.0  4.0     160.0  286.0  0.0      2.0    108.0    1.0      1.5    2.0  3.0  3.0             2
2  67.0  1.0  4.0     120.0  229.0  0.0      2.0    129.0    1.0      2.6    2.0  2.0  7.0             1
3  37.0  1.0  3.0     130.0  250.0  0.0      0.0    187.0    0.0      3.5    3.0  0.0  3.0             0
4  41.0  0.0  2.0     130.0  204.0  0.0      2.0    172.0    0.0      1.4    1.0  0.0  3.0             0

Attributes and Datatypes
age             float64
sex             float64
cp              float64
trestbps        float64
chol            float64
fbs             float64
restecg         float64
thalach         float64
exang           float64
oldpeak         float64
slope           float64
ca               object
thal             object
heartdisease      int64

In [27]:
model = BayesianModel([('age','trestbps'),('age','fbs'),('sex','trestbps'),
                       ('exang','trestbps'),('trestbps','heartdisease'),('fbs','heartdisease'),
                       ('heartdisease','restecg'),('heartdisease','thalach'),('heartdisease','chol')])
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)
HeartDisease_infer = VariableElimination(model)
print('Probability of Heart Disease given fbs')
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'fbs':1})
print(q)

Finding Elimination Order: : 100%|██████████████████████████████████████████████████████| 7/7 [00:00<00:00, 447.97it/s]
Eliminating: thalach: 100%|█████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 149.35it/s]

Probability of Heart Disease given fbs
+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.5582 |
+-----------------+---------------------+
| heartdisease(1) |              0.0949 |
+-----------------+---------------------+
| heartdisease(2) |              0.1467 |
+-----------------+---------------------+
| heartdisease(3) |              0.1580 |
+-----------------+---------------------+
| heartdisease(4) |              0.0421 |
+-----------------+---------------------+



