In [1]:
!pip install pgmpy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [30]:
from pgmpy.models import BayesianNetwork
from pgmpy.factors.discrete import TabularCPD
import sys

In [31]:
# We first create a model which contains the edges of the graph
model = BayesianNetwork([('S', 'Y'),
                       ('S', 'C'),
                       ('A', 'C'),
                       ('A', 'I'),
                       ('I', 'C'),
                       ('R', 'C'),
                       ('F', 'R'),
                       ('P', 'R'),
                       ('R', 'B')])

In [32]:
# Enter conditional probability distribution for each variable

# Prior probability for Smoking P(S)
cpd_S = TabularCPD(variable='S', variable_card=2, values=[[0.9], [0.1]])

# Prior probability for Alcohol Abuse P(A)
cpd_A = TabularCPD(variable='A', variable_card=2, values=[[0.9], [0.1]])

# Prior probability for Solar Flare P(F)
cpd_F = TabularCPD(variable='F', variable_card=2, values=[[0.999], [0.001]])

# Prior probability for Phone P(P)
cpd_P = TabularCPD(variable='P', variable_card=2, values=[[0.001], [0.999]])

In [33]:
# Conditional probability for Yellow fingers or P(Y | S)
cpd_Y = TabularCPD(variable='Y', variable_card=2, 
                   evidence=['S'], evidence_card=[2],
                   values=[[0.96, 0.05], [0.04, 0.95]])

# Conditional probability for Weakened Immune System or P(I | A)
cpd_I = TabularCPD(variable='I', variable_card=2,
                   evidence=['A'], evidence_card=[2],
                   values=[[0.95, 0.7], [0.05, 0.3]])

# Conditional probability for Radiation or P(R | F, P)
cpd_R = TabularCPD(variable='R', variable_card=2,
                   evidence=['F', 'P'], evidence_card=[2, 2],
                   values=[[0.99, 0.95, 0.8, 0.7],
                           [0.01, 0.05, 0.2, 0.3]])

# Conditional probability for Burn Skin or P(B | R)
cpd_B = TabularCPD(variable='B', variable_card=2,
                   evidence=['R'], evidence_card=[2],
                   values=[[0.98, 0.8], [0.02, 0.2]])

# Conditional probability for Cancer or P(C | A, I, R, S)
cpd_C = TabularCPD(variable='C', variable_card=2,
                   evidence=['A', 'I', 'R', 'S'], evidence_card=[2, 2, 2, 2],
                   values=[[0.9, 0.5, 0.8, 0.4, 0.7, 0.4, 0.6, 0.2, 0.8, 0.4, 0.7, 0.3, 0.6, 0.3, 0.5, 0.1], 
                           [0.1, 0.5, 0.2, 0.6, 0.3, 0.6, 0.4, 0.8, 0.2, 0.6, 0.3, 0.7, 0.4, 0.7, 0.5, 0.9]])

In [34]:
model.add_cpds(cpd_S, cpd_F, cpd_P, cpd_A, cpd_B, cpd_I, cpd_R, cpd_C, cpd_Y)
print(model.check_model())

True


###Inference

In [35]:
############### Inference ##################

from pgmpy.inference import VariableElimination

# Going to do variable elimination
infer = VariableElimination(model)

###b) What is the probability of radiation given cancer $C = 1$? Show values for $R\in\{0, 1\}$.

In [36]:
# Get probability of Radiation given Cancer P(R=1 | C=1)
phi_query = infer.query(['R'], evidence={'C':1}, joint = False)
factor = phi_query['R']
print('Probability of Radiation given Cancer')
print(factor)

Probability of Radiation given Cancer
+------+----------+
| R    |   phi(R) |
| R(0) |   0.9214 |
+------+----------+
| R(1) |   0.0786 |
+------+----------+


###C) What is the probability of cancer given the patient has skin burn, yellow fingers and abuses alcohol? Show values for $C\in\{0, 1\}$.
$$\text{Solve }P(C=1\mid B=1, Y=1, A=1)$$

In [37]:
# Get probability of Cancer given skin burn, yellow fingers and alcohol abuse. P(C=1 | B=1, Y=1, A=1)
phi_query = infer.query(['C'], evidence={'B':1, 'Y':1, 'A':1}, joint = False)
factor = phi_query['C']
print('Probability of Cancer given skin burn, yellow fingers and alcohol abuse')
print(factor)

Probability of Cancer given skin burn, yellow fingers and alcohol abuse
+------+----------+
| C    |   phi(C) |
| C(0) |   0.4296 |
+------+----------+
| C(1) |   0.5704 |
+------+----------+


###C) Are Smoking and skin burn independent given that cancer is present? Justify your answer.

No, they are dependent. There is a V-structure between $S$, $C$ and $R$.
$$S\rightarrow C\leftarrow R$$
Given $C$, it couples $S$ and $R$. This makes $S$ and $R$ dependent. Since $R$ and $B$ are dependent due to cascade structure
$$R \rightarrow B$$
then $S$ and $B$ are dependent.

###D) What is the probability of cancer if you never abused alcohol or used a cellphone?
$$\text{Solve }P(C=1\mid A=0, P=0)$$

In [38]:
# Get probability of Cancer given no alcohol and no cellphone. P(C=1 | A=0, P=0)
phi_query = infer.query(['C'], evidence={'A':0, 'P':0}, joint = False)
factor = phi_query['C']
print('Probability of Cancer given no alcohol and no cellphone')
print(factor)

Probability of Cancer given no alcohol and no cellphone
+------+----------+
| C    |   phi(C) |
| C(0) |   0.8495 |
+------+----------+
| C(1) |   0.1505 |
+------+----------+
