# Dheeraj J C - 1CR17CS036 (Prog 7)

Write a program to construct a Bayesian network considering medical data. Use this model to demonstrate the diagnosis of heart patients using standard Heart Disease Data Set. You can use Java/Python ML library classes/API.

In [1]:
from pgmpy.models import BayesianModel

In [2]:
cancer_model = BayesianModel([("Pollution", "Cancer"),("Smoker", "Cancer"),("Cancer", "Xray"),("Cancer", "Dyspnoea")])

print(cancer_model)




In [3]:
cancer_model.nodes()

NodeView(('Pollution', 'Cancer', 'Smoker', 'Xray', 'Dyspnoea'))

In [4]:
cancer_model.edges()

OutEdgeView([('Pollution', 'Cancer'), ('Cancer', 'Xray'), ('Cancer', 'Dyspnoea'), ('Smoker', 'Cancer')])

In [5]:
cancer_model.get_cpds()

[]

In [6]:
from pgmpy.factors.discrete import TabularCPD

In [7]:
cpd_poll = TabularCPD(variable="Pollution",variable_card=2,values=[[0.9],[0.1]])
cpd_smoke = TabularCPD(variable="Smoker",variable_card=2,values=[[0.3],[0.7]])
cpd_cancer = TabularCPD(variable="Cancer",variable_card=2,values=[[0.03, 0.05, 0.001, 0.02],[0.97, 0.95, 0.999, 0.98]],evidence=["Smoker", "Pollution"],evidence_card=[2, 2])
cpd_xray = TabularCPD(variable="Xray",variable_card=2,values=[[0.9, 0.2],[0.1, 0.8]],evidence=["Cancer"],evidence_card=[2])
cpd_dysp = TabularCPD(variable="Dyspnoea",variable_card=2,values=[[0.65, 0.3],[0.35, 0.7]],evidence=["Cancer"],evidence_card=[2])
cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp)
cancer_model.check_model()

True

In [8]:
cancer_model.is_active_trail("Pollution", "Smoker")

False

In [9]:
cancer_model.is_active_trail("Pollution", "Smoker", observed=["Cancer"])

True

In [10]:
cancer_model.get_cpds()

[<TabularCPD representing P(Pollution:2) at 0x2c876230490>,
 <TabularCPD representing P(Smoker:2) at 0x2c876230430>,
 <TabularCPD representing P(Cancer:2 | Smoker:2, Pollution:2) at 0x2c8762304f0>,
 <TabularCPD representing P(Xray:2 | Cancer:2) at 0x2c876230460>,
 <TabularCPD representing P(Dyspnoea:2 | Cancer:2) at 0x2c8762304c0>]

In [11]:
print(cancer_model.get_cpds("Pollution"))

+--------------+-----+
| Pollution(0) | 0.9 |
+--------------+-----+
| Pollution(1) | 0.1 |
+--------------+-----+


In [12]:
print(cancer_model.get_cpds("Smoker"))

+-----------+-----+
| Smoker(0) | 0.3 |
+-----------+-----+
| Smoker(1) | 0.7 |
+-----------+-----+


In [13]:
print(cancer_model.get_cpds("Xray"))

+---------+-----------+-----------+
| Cancer  | Cancer(0) | Cancer(1) |
+---------+-----------+-----------+
| Xray(0) | 0.9       | 0.2       |
+---------+-----------+-----------+
| Xray(1) | 0.1       | 0.8       |
+---------+-----------+-----------+


In [14]:
print(cancer_model.get_cpds("Dyspnoea"))

+-------------+-----------+-----------+
| Cancer      | Cancer(0) | Cancer(1) |
+-------------+-----------+-----------+
| Dyspnoea(0) | 0.65      | 0.3       |
+-------------+-----------+-----------+
| Dyspnoea(1) | 0.35      | 0.7       |
+-------------+-----------+-----------+


In [15]:
print(cancer_model.get_cpds("Cancer"))

+-----------+--------------+--------------+--------------+--------------+
| Smoker    | Smoker(0)    | Smoker(0)    | Smoker(1)    | Smoker(1)    |
+-----------+--------------+--------------+--------------+--------------+
| Pollution | Pollution(0) | Pollution(1) | Pollution(0) | Pollution(1) |
+-----------+--------------+--------------+--------------+--------------+
| Cancer(0) | 0.03         | 0.05         | 0.001        | 0.02         |
+-----------+--------------+--------------+--------------+--------------+
| Cancer(1) | 0.97         | 0.95         | 0.999        | 0.98         |
+-----------+--------------+--------------+--------------+--------------+


In [16]:
cancer_model.local_independencies("Xray")

(Xray _|_ Pollution, Dyspnoea, Smoker | Cancer)

In [17]:
cancer_model.local_independencies("Pollution")

(Pollution _|_ Smoker)

In [18]:
cancer_model.local_independencies("Smoker")

(Smoker _|_ Pollution)

In [19]:
cancer_model.local_independencies("Dyspnoea")

(Dyspnoea _|_ Pollution, Smoker, Xray | Cancer)

In [20]:
cancer_model.local_independencies("Cancer")



In [21]:
cancer_model.get_independencies()

(Pollution _|_ Smoker)
(Pollution _|_ Dyspnoea, Xray | Cancer)
(Pollution _|_ Xray | Dyspnoea, Cancer)
(Pollution _|_ Dyspnoea | Cancer, Xray)
(Pollution _|_ Dyspnoea, Xray | Cancer, Smoker)
(Pollution _|_ Xray | Dyspnoea, Cancer, Smoker)
(Pollution _|_ Dyspnoea | Smoker, Cancer, Xray)
(Smoker _|_ Pollution)
(Smoker _|_ Dyspnoea, Xray | Cancer)
(Smoker _|_ Dyspnoea, Xray | Pollution, Cancer)
(Smoker _|_ Xray | Dyspnoea, Cancer)
(Smoker _|_ Dyspnoea | Cancer, Xray)
(Smoker _|_ Xray | Pollution, Dyspnoea, Cancer)
(Smoker _|_ Dyspnoea | Pollution, Cancer, Xray)
(Xray _|_ Pollution, Dyspnoea, Smoker | Cancer)
(Xray _|_ Dyspnoea, Smoker | Pollution, Cancer)
(Xray _|_ Pollution, Smoker | Dyspnoea, Cancer)
(Xray _|_ Pollution, Dyspnoea | Cancer, Smoker)
(Xray _|_ Smoker | Pollution, Dyspnoea, Cancer)
(Xray _|_ Dyspnoea | Pollution, Cancer, Smoker)
(Xray _|_ Pollution | Dyspnoea, Cancer, Smoker)
(Dyspnoea _|_ Pollution, Smoker, Xray | Cancer)
(Dyspnoea _|_ Smoker, Xray | Pollution, Cancer)
(Dy

In [22]:
from pgmpy.inference import VariableElimination

In [23]:
cancer_infr = VariableElimination(cancer_model)

In [24]:
q = cancer_infr.query(variables=["Cancer"], evidence={"Smoker": 1})
print(q)

Finding Elimination Order: : 100%|██████████| 3/3 [00:00<00:00, 191.95it/s]
Eliminating: Xray: 100%|██████████| 3/3 [00:00<00:00, 273.47it/s]

+-----------+---------------+
| Cancer    |   phi(Cancer) |
| Cancer(0) |        0.0029 |
+-----------+---------------+
| Cancer(1) |        0.9971 |
+-----------+---------------+





In [25]:
q = cancer_infr.query(variables=["Cancer"], evidence={"Smoker": 1, "Pollution": 1})
print(q)

Finding Elimination Order: : 100%|██████████| 2/2 [00:00<00:00, 1002.58it/s]
Eliminating: Xray: 100%|██████████| 2/2 [00:00<00:00, 401.23it/s]

+-----------+---------------+
| Cancer    |   phi(Cancer) |
| Cancer(0) |        0.0200 |
+-----------+---------------+
| Cancer(1) |        0.9800 |
+-----------+---------------+





# extra code

In [26]:
import numpy as np
from urllib.request import urlopen
import urllib
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import pandas as pd

In [27]:
data= "http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data"

In [28]:
names = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak", "slope", "ca", "thal", "heartdisease"]

In [29]:
heartDisease = pd.read_csv(urlopen(data), names=names)
heartDisease.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease
0,28,1,2,130,132,0,2,185,0,0.0,?,?,?,0
1,29,1,2,120,243,0,0,160,0,0.0,?,?,?,0
2,29,1,2,140,?,0,0,170,0,0.0,?,?,?,0
3,30,0,1,170,237,0,1,170,0,0.0,?,?,6,0
4,31,0,2,100,219,0,1,150,0,0.0,?,?,?,0


In [30]:
del heartDisease["ca"]
del heartDisease["slope"]
del heartDisease["thal"]
del heartDisease["oldpeak"]

In [31]:
heartDisease = heartDisease.replace("?", np.nan)
heartDisease.dtypes

age              int64
sex              int64
cp               int64
trestbps        object
chol            object
fbs             object
restecg         object
thalach         object
exang           object
heartdisease     int64
dtype: object

In [32]:
heartDisease.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'heartdisease'],
      dtype='object')

In [33]:
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator

In [34]:
model = BayesianModel([("age", "trestbps"),("age", "fbs"),("sex", "trestbps"),("sex", "trestbps"),("exang", "trestbps"),("trestbps", "heartdisease"),("fbs", "heartdisease"),("heartdisease", "restecg"),("heartdisease", "thalach"),("heartdisease", "thalach"),("heartdisease", "chol")])

In [35]:
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)
print(model.get_cpds("age"))

+---------+------------+
| age(28) | 0.00383142 |
+---------+------------+
| age(29) | 0.00383142 |
+---------+------------+
| age(30) | 0.00383142 |
+---------+------------+
| age(31) | 0.00766284 |
+---------+------------+
| age(32) | 0.0153257  |
+---------+------------+
| age(33) | 0.00766284 |
+---------+------------+
| age(34) | 0.0153257  |
+---------+------------+
| age(35) | 0.0191571  |
+---------+------------+
| age(36) | 0.0191571  |
+---------+------------+
| age(37) | 0.0306513  |
+---------+------------+
| age(38) | 0.0191571  |
+---------+------------+
| age(39) | 0.0344828  |
+---------+------------+
| age(40) | 0.0191571  |
+---------+------------+
| age(41) | 0.0383142  |
+---------+------------+
| age(42) | 0.0268199  |
+---------+------------+
| age(43) | 0.0421456  |
+---------+------------+
| age(44) | 0.0268199  |
+---------+------------+
| age(45) | 0.0229885  |
+---------+------------+
| age(46) | 0.045977   |
+---------+------------+
| age(47) | 0.0344828  |


In [36]:
print(model.get_cpds("chol"))

+--------------+----------------------+----------------------+
| heartdisease | heartdisease(0)      | heartdisease(1)      |
+--------------+----------------------+----------------------+
| chol(100)    | 0.006134969325153374 | 0.0                  |
+--------------+----------------------+----------------------+
| chol(117)    | 0.0                  | 0.01020408163265306  |
+--------------+----------------------+----------------------+
| chol(129)    | 0.006134969325153374 | 0.0                  |
+--------------+----------------------+----------------------+
| chol(132)    | 0.006134969325153374 | 0.0                  |
+--------------+----------------------+----------------------+
| chol(147)    | 0.012269938650306749 | 0.0                  |
+--------------+----------------------+----------------------+
| chol(156)    | 0.0                  | 0.01020408163265306  |
+--------------+----------------------+----------------------+
| chol(160)    | 0.012269938650306749 | 0.0102040816326

In [37]:
print(model.get_cpds("sex"))

+--------+----------+
| sex(0) | 0.264368 |
+--------+----------+
| sex(1) | 0.735632 |
+--------+----------+


In [38]:
model.get_independencies()

(age _|_ exang, sex)
(age _|_ exang | sex)
(age _|_ sex | exang)
(age _|_ restecg, chol, thalach | heartdisease)
(age _|_ exang, sex | fbs)
(age _|_ chol, thalach | restecg, heartdisease)
(age _|_ restecg, chol, thalach | sex, heartdisease)
(age _|_ exang | fbs, sex)
(age _|_ restecg, chol, thalach | exang, heartdisease)
(age _|_ sex | fbs, exang)
(age _|_ restecg, chol, thalach | trestbps, heartdisease)
(age _|_ restecg, chol, thalach, heartdisease | trestbps, fbs)
(age _|_ restecg, thalach | chol, heartdisease)
(age _|_ restecg, chol | thalach, heartdisease)
(age _|_ restecg, chol, thalach | fbs, heartdisease)
(age _|_ chol, thalach | restecg, sex, heartdisease)
(age _|_ chol, thalach | restecg, exang, heartdisease)
(age _|_ chol, thalach | restecg, trestbps, heartdisease)
(age _|_ chol, thalach, heartdisease | restecg, fbs, trestbps)
(age _|_ thalach | restecg, chol, heartdisease)
(age _|_ chol | restecg, thalach, heartdisease)
(age _|_ chol, thalach | restecg, fbs, heartdisease)
(a

In [39]:
from pgmpy.inference import VariableElimination
heartDisease_infer = VariableElimination(model)
q = heartDisease_infer.query(variables=["heartdisease"],
evidence={"age": 29})

print(q)

Finding Elimination Order: : 100%|██████████| 7/7 [00:00<00:00, 448.09it/s]
Eliminating: exang: 100%|██████████| 7/7 [00:00<00:00, 366.63it/s]

+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              0.6630 |
+-----------------+---------------------+
| heartdisease(1) |              0.3370 |
+-----------------+---------------------+





In [40]:
q = heartDisease_infer.query(variables=["heartdisease"],
evidence={"chol": 100})

print(q)

  warn(
Finding Elimination Order: : 100%|██████████| 7/7 [00:00<?, ?it/s]
Eliminating: exang: 100%|██████████| 7/7 [00:00<00:00, 90.35it/s]

+-----------------+---------------------+
| heartdisease    |   phi(heartdisease) |
| heartdisease(0) |              1.0000 |
+-----------------+---------------------+
| heartdisease(1) |              0.0000 |
+-----------------+---------------------+



