# 7.Problem : Write a program to construct a Bayesian network considering medical data. Use this model to demonstrate the diagnosis of heart patients using standard Heart Disease Data Set. You can use Python ML library API.

In [4]:
# Starting with defining the network structure
from pgmpy.models import BayesianModel

cancer_model = BayesianModel([('Pollution', 'Cancer'), ('Smoker', 'Cancer'),
                              ('Cancer', 'Xray'),('Cancer', 'Dyspnoea')])

In [5]:
print(cancer_model)




In [6]:
cancer_model.nodes()

NodeView(('Pollution', 'Cancer', 'Smoker', 'Xray', 'Dyspnoea'))

In [7]:
cancer_model.edges()

OutEdgeView([('Pollution', 'Cancer'), ('Cancer', 'Xray'), ('Cancer', 'Dyspnoea'), ('Smoker', 'Cancer')])

In [8]:
cancer_model.get_cpds()

[]

In [9]:
# Now defining the parameters.
from pgmpy.factors.discrete import TabularCPD

cpd_poll = TabularCPD(variable='Pollution', variable_card=2,
                      values=[[0.9], [0.1]])
cpd_smoke = TabularCPD(variable='Smoker', variable_card=2,
                       values=[[0.3], [0.7]])
cpd_cancer = TabularCPD(variable='Cancer', variable_card=2,
                        values=[[0.03, 0.05, 0.001, 0.02],
                                [0.97, 0.95, 0.999, 0.98]],
                        evidence=['Smoker', 'Pollution'],
                        evidence_card=[2, 2])
cpd_xray = TabularCPD(variable='Xray', variable_card=2,
                      values=[[0.9, 0.2], [0.1, 0.8]],
                      evidence=['Cancer'], evidence_card=[2])
cpd_dysp = TabularCPD(variable='Dyspnoea', variable_card=2,
                      values=[[0.65, 0.3], [0.35, 0.7]],
                      evidence=['Cancer'], evidence_card=[2])

In [10]:
# Associating the parameters with the model structure.
cancer_model.add_cpds(cpd_poll, cpd_smoke, cpd_cancer, cpd_xray, cpd_dysp)

# Checking if the cpds are valid for the model.
cancer_model.check_model()

True

In [11]:
# Doing some simple queries on the network
cancer_model.is_active_trail('Pollution', 'Smoker')

False

In [12]:
cancer_model.is_active_trail('Pollution', 'Smoker', observed=['Cancer'])

True

In [13]:
cancer_model.get_cpds()

[<TabularCPD representing P(Pollution:2) at 0x7f9747fea898>,
 <TabularCPD representing P(Smoker:2) at 0x7f9747fea860>,
 <TabularCPD representing P(Cancer:2 | Smoker:2, Pollution:2) at 0x7f9747fea908>,
 <TabularCPD representing P(Xray:2 | Cancer:2) at 0x7f9747fea940>,
 <TabularCPD representing P(Dyspnoea:2 | Cancer:2) at 0x7f9747fea978>]

In [14]:
print(cancer_model.get_cpds('Pollution'))

╒═════════════╤═════╕
│ Pollution_0 │ 0.9 │
├─────────────┼─────┤
│ Pollution_1 │ 0.1 │
╘═════════════╧═════╛


In [15]:
print(cancer_model.get_cpds('Smoker'))

╒══════════╤═════╕
│ Smoker_0 │ 0.3 │
├──────────┼─────┤
│ Smoker_1 │ 0.7 │
╘══════════╧═════╛


In [16]:
print(cancer_model.get_cpds('Xray'))

╒════════╤══════════╤══════════╕
│ Cancer │ Cancer_0 │ Cancer_1 │
├────────┼──────────┼──────────┤
│ Xray_0 │ 0.9      │ 0.2      │
├────────┼──────────┼──────────┤
│ Xray_1 │ 0.1      │ 0.8      │
╘════════╧══════════╧══════════╛


In [17]:
print(cancer_model.get_cpds('Dyspnoea'))

╒════════════╤══════════╤══════════╕
│ Cancer     │ Cancer_0 │ Cancer_1 │
├────────────┼──────────┼──────────┤
│ Dyspnoea_0 │ 0.65     │ 0.3      │
├────────────┼──────────┼──────────┤
│ Dyspnoea_1 │ 0.35     │ 0.7      │
╘════════════╧══════════╧══════════╛


In [18]:
print(cancer_model.get_cpds('Cancer'))

╒═══════════╤═════════════╤═════════════╤═════════════╤═════════════╕
│ Smoker    │ Smoker_0    │ Smoker_0    │ Smoker_1    │ Smoker_1    │
├───────────┼─────────────┼─────────────┼─────────────┼─────────────┤
│ Pollution │ Pollution_0 │ Pollution_1 │ Pollution_0 │ Pollution_1 │
├───────────┼─────────────┼─────────────┼─────────────┼─────────────┤
│ Cancer_0  │ 0.03        │ 0.05        │ 0.001       │ 0.02        │
├───────────┼─────────────┼─────────────┼─────────────┼─────────────┤
│ Cancer_1  │ 0.97        │ 0.95        │ 0.999       │ 0.98        │
╘═══════════╧═════════════╧═════════════╧═════════════╧═════════════╛


In [19]:
cancer_model.local_independencies('Xray')

(Xray _|_ Pollution, Smoker, Dyspnoea | Cancer)

In [20]:
cancer_model.local_independencies('Pollution')

(Pollution _|_ Smoker, Xray, Cancer, Dyspnoea)

In [21]:
cancer_model.local_independencies('Smoker')

(Smoker _|_ Pollution, Xray, Cancer, Dyspnoea)

In [22]:
cancer_model.local_independencies('Dyspnoea')

(Dyspnoea _|_ Xray, Smoker, Pollution | Cancer)

In [23]:
cancer_model.local_independencies('Cancer')

(Cancer _|_ Xray, Dyspnoea | Pollution, Smoker)

In [24]:
cancer_model.get_independencies()

(Pollution _|_ Smoker)
(Pollution _|_ Xray, Dyspnoea | Cancer)
(Pollution _|_ Xray, Dyspnoea | Cancer, Smoker)
(Pollution _|_ Dyspnoea | Xray, Cancer)
(Pollution _|_ Xray | Cancer, Dyspnoea)
(Pollution _|_ Dyspnoea | Cancer, Xray, Smoker)
(Pollution _|_ Xray | Cancer, Smoker, Dyspnoea)
(Smoker _|_ Pollution)
(Smoker _|_ Xray, Dyspnoea | Cancer)
(Smoker _|_ Xray, Dyspnoea | Pollution, Cancer)
(Smoker _|_ Dyspnoea | Xray, Cancer)
(Smoker _|_ Xray | Cancer, Dyspnoea)
(Smoker _|_ Dyspnoea | Pollution, Xray, Cancer)
(Smoker _|_ Xray | Pollution, Cancer, Dyspnoea)
(Xray _|_ Pollution, Smoker, Dyspnoea | Cancer)
(Xray _|_ Pollution, Dyspnoea | Cancer, Smoker)
(Xray _|_ Smoker, Dyspnoea | Cancer, Pollution)
(Xray _|_ Pollution, Smoker | Cancer, Dyspnoea)
(Xray _|_ Dyspnoea | Cancer, Smoker, Pollution)
(Xray _|_ Pollution | Cancer, Smoker, Dyspnoea)
(Xray _|_ Smoker | Dyspnoea, Cancer, Pollution)
(Dyspnoea _|_ Xray, Smoker, Pollution | Cancer)
(Dyspnoea _|_ Xray, Pollution | Cancer, Smoker)
(Dy

In [25]:
# Doing exact inference using Variable Elimination
from pgmpy.inference import VariableElimination
cancer_infer = VariableElimination(cancer_model)

# Computing the probability of bronc given smoke.
q = cancer_infer.query(variables=['Cancer'], evidence={'Smoker': 1})
print(q['Cancer'])

╒══════════╤═══════════════╕
│ Cancer   │   phi(Cancer) │
╞══════════╪═══════════════╡
│ Cancer_0 │        0.0029 │
├──────────┼───────────────┤
│ Cancer_1 │        0.9971 │
╘══════════╧═══════════════╛


In [26]:
# Computing the probability of bronc given smoke.
q = cancer_infer.query(variables=['Cancer'], evidence={'Smoker': 1})
print(q['Cancer'])

╒══════════╤═══════════════╕
│ Cancer   │   phi(Cancer) │
╞══════════╪═══════════════╡
│ Cancer_0 │        0.0029 │
├──────────┼───────────────┤
│ Cancer_1 │        0.9971 │
╘══════════╧═══════════════╛


In [27]:
# Computing the probability of bronc given smoke.
q = cancer_infer.query(variables=['Cancer'], evidence={'Smoker': 1,'Pollution': 1})
print(q['Cancer'])

╒══════════╤═══════════════╕
│ Cancer   │   phi(Cancer) │
╞══════════╪═══════════════╡
│ Cancer_0 │        0.0200 │
├──────────┼───────────────┤
│ Cancer_1 │        0.9800 │
╘══════════╧═══════════════╛


In [28]:
import numpy as np
from urllib.request import urlopen
import urllib
import matplotlib.pyplot as plt # Visuals
import seaborn as sns 
import sklearn as skl
import pandas as pd

In [29]:
Cleveland_data_URL = 'http://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.hungarian.data'
np.set_printoptions(threshold=np.nan) #see a whole array when we output it

names = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 
         'slope', 'ca', 'thal', 'heartdisease']
heartDisease = pd.read_csv(urlopen(Cleveland_data_URL), names = names) #gets Cleveland data
heartDisease.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease
0,28,1,2,130,132,0,2,185,0,0.0,?,?,?,0
1,29,1,2,120,243,0,0,160,0,0.0,?,?,?,0
2,29,1,2,140,?,0,0,170,0,0.0,?,?,?,0
3,30,0,1,170,237,0,1,170,0,0.0,?,?,6,0
4,31,0,2,100,219,0,1,150,0,0.0,?,?,?,0


In [30]:
del heartDisease['ca']
del heartDisease['slope']
del heartDisease['thal']
del heartDisease['oldpeak']

heartDisease = heartDisease.replace('?', np.nan)
heartDisease.dtypes

age              int64
sex              int64
cp               int64
trestbps        object
chol            object
fbs             object
restecg         object
thalach         object
exang           object
heartdisease     int64
dtype: object

In [39]:
heartDisease.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,heartdisease
0,28,1,2,130,132.0,0,2,185,0,0
1,29,1,2,120,243.0,0,0,160,0,0
2,29,1,2,140,,0,0,170,0,0
3,30,0,1,170,237.0,0,1,170,0,0
4,31,0,2,100,219.0,0,1,150,0,0


In [31]:
heartDisease.columns

Index(['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach',
       'exang', 'heartdisease'],
      dtype='object')

In [32]:
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator, BayesianEstimator

model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'), ('sex', 'trestbps'), 
                       ('exang', 'trestbps'),('trestbps','heartdisease'),('fbs','heartdisease'),
                      ('heartdisease','restecg'),('heartdisease','thalach'),('heartdisease','chol')])

# Learing CPDs using Maximum Likelihood Estimators
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)
#for cpd in model.get_cpds():
 #   print("CPD of {variable}:".format(variable=cpd.variable))
  #  print(cpd)

In [33]:
print(model.get_cpds('age'))

╒═════════╤════════════╕
│ age(28) │ 0.00383142 │
├─────────┼────────────┤
│ age(29) │ 0.00383142 │
├─────────┼────────────┤
│ age(30) │ 0.00383142 │
├─────────┼────────────┤
│ age(31) │ 0.00766284 │
├─────────┼────────────┤
│ age(32) │ 0.0153257  │
├─────────┼────────────┤
│ age(33) │ 0.00766284 │
├─────────┼────────────┤
│ age(34) │ 0.0153257  │
├─────────┼────────────┤
│ age(35) │ 0.0191571  │
├─────────┼────────────┤
│ age(36) │ 0.0191571  │
├─────────┼────────────┤
│ age(37) │ 0.0306513  │
├─────────┼────────────┤
│ age(38) │ 0.0191571  │
├─────────┼────────────┤
│ age(39) │ 0.0344828  │
├─────────┼────────────┤
│ age(40) │ 0.0191571  │
├─────────┼────────────┤
│ age(41) │ 0.0383142  │
├─────────┼────────────┤
│ age(42) │ 0.0268199  │
├─────────┼────────────┤
│ age(43) │ 0.0421456  │
├─────────┼────────────┤
│ age(44) │ 0.0268199  │
├─────────┼────────────┤
│ age(45) │ 0.0229885  │
├─────────┼────────────┤
│ age(46) │ 0.045977   │
├─────────┼────────────┤
│ age(47) │ 0.0344828  │


In [34]:
print(model.get_cpds('chol'))

╒══════════════╤══════════════════════╤══════════════════════╕
│ heartdisease │ heartdisease(0)      │ heartdisease(1)      │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(100)    │ 0.006535947712418301 │ 0.006535947712418301 │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(117)    │ 0.006535947712418301 │ 0.006535947712418301 │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(129)    │ 0.006535947712418301 │ 0.006535947712418301 │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(132)    │ 0.006535947712418301 │ 0.006535947712418301 │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(147)    │ 0.006535947712418301 │ 0.006535947712418301 │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(156)    │ 0.006535947712418301 │ 0.006535947712418301 │
├──────────────┼──────────────────────┼──────────────────────┤
│ chol(160)    │ 0.006535947712418301 │ 0.0065359477124

In [35]:
print(model.get_cpds('sex'))

╒════════╤══════════╕
│ sex(0) │ 0.264368 │
├────────┼──────────┤
│ sex(1) │ 0.735632 │
╘════════╧══════════╛


In [36]:
model.get_independencies()

(age _|_ sex, exang)
(age _|_ exang | sex)
(age _|_ sex, exang | fbs)
(age _|_ sex | exang)
(age _|_ restecg, chol, thalach | heartdisease)
(age _|_ exang | sex, fbs)
(age _|_ restecg, chol, thalach | sex, heartdisease)
(age _|_ chol, thalach | restecg, heartdisease)
(age _|_ sex | fbs, exang)
(age _|_ restecg, chol, heartdisease, thalach | fbs, trestbps)
(age _|_ restecg, chol, thalach | heartdisease, fbs)
(age _|_ restecg, chol, thalach | heartdisease, exang)
(age _|_ restecg, chol, thalach | heartdisease, trestbps)
(age _|_ restecg, chol | heartdisease, thalach)
(age _|_ restecg, thalach | heartdisease, chol)
(age _|_ chol, thalach | sex, heartdisease, restecg)
(age _|_ restecg, chol, heartdisease, thalach | sex, fbs, trestbps)
(age _|_ restecg, chol, thalach | sex, fbs, heartdisease)
(age _|_ restecg, chol, thalach | sex, heartdisease, exang)
(age _|_ restecg, chol, thalach | sex, heartdisease, trestbps)
(age _|_ restecg, chol | sex, heartdisease, thalach)
(age _|_ restecg, thalach

In [37]:
# Doing exact inference using Variable Elimination
from pgmpy.inference import VariableElimination
HeartDisease_infer = VariableElimination(model)

# Computing the probability of bronc given smoke.
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 28})
print(q['heartdisease'])

╒════════════════╤═════════════════════╕
│ heartdisease   │   phi(heartdisease) │
╞════════════════╪═════════════════════╡
│ heartdisease_0 │              0.5770 │
├────────────────┼─────────────────────┤
│ heartdisease_1 │              0.4230 │
╘════════════════╧═════════════════════╛


In [38]:
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'chol': 100})
print(q['heartdisease'])

╒════════════════╤═════════════════════╕
│ heartdisease   │   phi(heartdisease) │
╞════════════════╪═════════════════════╡
│ heartdisease_0 │              0.5510 │
├────────────────┼─────────────────────┤
│ heartdisease_1 │              0.4490 │
╘════════════════╧═════════════════════╛
