In [1]:
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import BeliefPropagation

In [2]:
# Generate model object
model = BayesianModel()

In [3]:
# Create nodes 
# H: Hystory of Smoking  B: Bronchitis
# L: Lung Cancer         F: Fatigue     
# C: Chest X-ray
model.add_nodes_from(['H', 'B', 'L', 'F', 'C'])

In [4]:
# Create edges
model.add_edges_from([('H', 'B'), ('H', 'L'), ('B', 'F'), ('L', 'F'), ('L', 'C')])

In [5]:
# Create CPDs

# History of Smoking 
# h0: There is a history of smoking, h1: There is no history of smoking
# +-----+-----+
# | H_0 | 0.2 |
# +-----+-----+
# | H_1 | 0.8 |
# +-----+-----+
cpd_h = TabularCPD(variable='H', variable_card=2, values=[[0.2, 0.8]])

# Bronchitis
# b0: Bronchitis is present. b1: Bronchitis is is absent
# +-----+------+------+
# | H   | H_0  | H_1  |
# +-----+------+------+
# | B_0 | 0.25 | 0.05 |
# +-----+------+------+
# | B_1 | 0.75 | 0.95 |
# +-----+------+------+
cpd_b = TabularCPD(variable='B', variable_card=2, 
                   values=[[0.25, 0.05],
                           [0.75, 0.95]],
                   evidence=['H'],
                   evidence_card=[2])

# Lung Cancer
# l0: Lung cancer is present, l1: Lung cancer is absent
# +-----+-------+---------+
# | H   | H_0   | H_1     |
# +-----+-------+---------+
# | L_0 | 0.003 | 5e-05   |
# +-----+-------+---------+
# | L_1 | 0.997 | 0.99995 |
# +-----+-------+---------+
cpd_l = TabularCPD(variable='L', variable_card=2, 
                   values=[[0.003, 0.00005],
                           [0.997, 0.99995]],
                   evidence=['H'],
                   evidence_card=[2])

# Fatigue
# f0: Fatigue is present, f1:Fatigue is absent
# +-----+------+-----+-----+------+
# | B   | B_0  | B_0 | B_1 | B_1  |
# +-----+------+-----+-----+------+
# | L   | L_0  | L_1 | L_0 | L_1  |
# +-----+------+-----+-----+------+
# | F_0 | 0.75 | 0.1 | 0.5 | 0.05 |
# +-----+------+-----+-----+------+
# | F_1 | 0.25 | 0.9 | 0.5 | 0.95 |
# +-----+------+-----+-----+------+
cpd_f = TabularCPD(variable='F', variable_card=2, 
                   values=[[0.75, 0.1, 0.5, 0.05],
                           [0.25, 0.9, 0.5, 0.95]],
                   evidence=['B', 'L'],
                   evidence_card=[2, 2])

# Chest X-ray
# c0: Chest X-ray is positive, c1: Chest X-ray is negative
# +-----+-----+------+
# | L   | L_0 | L_1  |
# +-----+-----+------+
# | C_0 | 0.6 | 0.02 |
# +-----+-----+------+
# | C_1 | 0.4 | 0.98 |
# +-----+-----+------+
cpd_c = TabularCPD(variable='C', variable_card=2, 
                   values=[[0.6, 0.02],
                           [0.4, 0.98]],
                   evidence=['L'],
                   evidence_card=[2])

In [6]:
# Associating the CPDs with the network
model.add_cpds(cpd_h, cpd_b, cpd_l, cpd_f, cpd_c)

# check_model checks for the network structure and CPDs and verifies that the CPDs are correctly 
# defined and sum to 1.
model.check_model()

True

In [7]:
# Getting all the local independencies in the network.
model.local_independencies(['H', 'B', 'L', 'F', 'C'])

(B _|_ L, C | H)
(L _|_ B | H)
(F _|_ H, C | L, B)
(C _|_ F, H, B | L)

In [8]:
# Inference
inference = BeliefPropagation(model)
print(inference.query(variables=['L'], evidence={'B': 0, 'F': 1, 'C': 0, 'H': 0}) ['L'])

+-----+----------+
| L   |   phi(L) |
| L_0 |   0.0245 |
+-----+----------+
| L_1 |   0.9755 |
+-----+----------+


  phi.values = phi.values[slice_]
  phi1.values = phi1.values[slice_]
  phi1.values = phi1.values[slice_]


In [9]:
# Save model in file
from pgmpy.readwrite import BIFWriter

writer = BIFWriter(model)
writer.network_name = 'cancer_example_from_neapolitan'
writer.write_bif('cancer_example.bif')

In [15]:
# Load model from file
from pgmpy.readwrite import BIFReader

# Load 
reader = BIFReader('cancer_example.bif')
loaded_model = reader.get_model()

# Check
print(loaded_model.nodes())
print(loaded_model.edges())


['B', 'C', 'F', 'H', 'L']
[('B', 'F'), ('H', 'B'), ('H', 'L'), ('L', 'C'), ('L', 'F')]
