
Ref: https://healthyalgorithms.com/2011/11/23/causal-modeling-in-python-bayesian-networks-in-pymc/

# Install pgmpy

In [2]:
!pip install pgmpy

Collecting pgmpy
[?25l  Downloading https://files.pythonhosted.org/packages/68/6b/661a65aa7788f3aff7228ba81625c540917d656f41e3eb031c6d60b0a25d/pgmpy-0.1.10-py3-none-any.whl (339kB)
[K     |████████████████████████████████| 348kB 2.8MB/s 
Installing collected packages: pgmpy
Successfully installed pgmpy-0.1.10


#Import pgmpy

In [71]:
from pgmpy.factors.discrete  import TabularCPD
from pgmpy.models  import BayesianModel

# Check version
import pgmpy
print(pgmpy.__version__)

0.1.10


# Setting up your model

![alt text](https://imghostr.com/image.php?sig=31yBZ4681&h=d055d30eb2df0925ae5082caeb676f70&t=1583943208)

In [64]:
model = BayesianModel([('rain', 'sprinkler'),
                       ('rain', 'wetgrass'),
                       ('sprinkler', 'wetgrass')
                      ])

#######################################

# rain definition
rain_cpd = TabularCPD(
    variable = 'rain', variable_card = 2,
    values = [[0.2], [0.8]]
)

# sprinkler definition
sprinkler_cpd = TabularCPD(
    variable = 'sprinkler', variable_card = 2,
    values = [[0.4, 0.01], [0.6, 0.99]],
    evidence = ['rain'], evidence_card = [2])

# grass wet definition
wetgrass_cpd = TabularCPD(
    variable = 'wetgrass', variable_card = 2,
    values = [[0, 0.8, 0.9, 0.99], [1, 0.2, 0.1, 0.01]],
              evidence = ['rain', 'sprinkler'], evidence_card = [2, 2])


print(rain_cpd)
print(sprinkler_cpd)
print(wetgrass_cpd)

+---------+-----+
| rain(0) | 0.2 |
+---------+-----+
| rain(1) | 0.8 |
+---------+-----+
+--------------+---------+---------+
| rain         | rain(0) | rain(1) |
+--------------+---------+---------+
| sprinkler(0) | 0.4     | 0.01    |
+--------------+---------+---------+
| sprinkler(1) | 0.6     | 0.99    |
+--------------+---------+---------+
+-------------+--------------+--------------+--------------+--------------+
| rain        | rain(0)      | rain(0)      | rain(1)      | rain(1)      |
+-------------+--------------+--------------+--------------+--------------+
| sprinkler   | sprinkler(0) | sprinkler(1) | sprinkler(0) | sprinkler(1) |
+-------------+--------------+--------------+--------------+--------------+
| wetgrass(0) | 0.0          | 0.8          | 0.9          | 0.99         |
+-------------+--------------+--------------+--------------+--------------+
| wetgrass(1) | 1.0          | 0.2          | 0.1          | 0.01         |
+-------------+--------------+-------------

# Add relationships to your models

In [0]:
model.add_cpds(rain_cpd, sprinkler_cpd, wetgrass_cpd)

# Examine the structure of your graph

In [46]:
model.get_cpds()

[<TabularCPD representing P(rain:2) at 0x7f973ece3b00>,
 <TabularCPD representing P(sprinkler:2 | rain:2) at 0x7f973ece3b70>,
 <TabularCPD representing P(wetgrass:2 | rain:2, sprinkler:2) at 0x7f973ece3ba8>]

# Find all active trail nodes

In [47]:
model.active_trail_nodes('wetgrass')

{'wetgrass': {'rain', 'sprinkler', 'wetgrass'}}

# Making Inference

In [62]:
model.get_independencies()



In [0]:
from pgmpy.inference import VariableElimination

model_infer = VariableElimination(model)

# P(wetgrass)

In [50]:
prob_wetgrass = model_infer.query(variables = ['wetgrass'])
print(prob_wetgrass)

Finding Elimination Order: : 100%|██████████| 2/2 [00:00<00:00, 862.76it/s]
Eliminating: rain: 100%|██████████| 2/2 [00:00<00:00, 367.13it/s]

+-------------+-----------------+
| wetgrass    |   phi(wetgrass) |
| wetgrass(0) |          0.8873 |
+-------------+-----------------+
| wetgrass(1) |          0.1127 |
+-------------+-----------------+





# P(rain, wetgrass)

In [51]:
prob_wetgrass_rain = model_infer.query(variables = ['rain', 'wetgrass'])
print(prob_wetgrass_rain)

Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 341.75it/s]
Eliminating: sprinkler: 100%|██████████| 1/1 [00:00<00:00, 429.48it/s]

+---------+-------------+----------------------+
| rain    | wetgrass    |   phi(rain,wetgrass) |
| rain(0) | wetgrass(0) |               0.0960 |
+---------+-------------+----------------------+
| rain(0) | wetgrass(1) |               0.1040 |
+---------+-------------+----------------------+
| rain(1) | wetgrass(0) |               0.7913 |
+---------+-------------+----------------------+
| rain(1) | wetgrass(1) |               0.0087 |
+---------+-------------+----------------------+





# P(wetgrass | given rain)

In [65]:
prob_evidence = model_infer.query(variables = ['sprinkler'],
                                  evidence = {'rain': 1}) 
print(prob_evidence)

Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 212.22it/s]
Eliminating: wetgrass: 100%|██████████| 1/1 [00:00<00:00, 270.95it/s]

+--------------+------------------+
| sprinkler    |   phi(sprinkler) |
| sprinkler(0) |           0.0100 |
+--------------+------------------+
| sprinkler(1) |           0.9900 |
+--------------+------------------+





# P(wetgrass | given no rain)

In [67]:
output = model_infer.query(variables = ['sprinkler'],
            evidence = {'rain':0})
print(output)

Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 163.43it/s]
Eliminating: wetgrass: 100%|██████████| 1/1 [00:00<00:00, 447.39it/s]

+--------------+------------------+
| sprinkler    |   phi(sprinkler) |
| sprinkler(0) |           0.4000 |
+--------------+------------------+
| sprinkler(1) |           0.6000 |
+--------------+------------------+





# P(sprinkler)

In [68]:
x = model_infer.query(variables = ['sprinkler'])
print(x)

Finding Elimination Order: : 100%|██████████| 2/2 [00:00<00:00, 210.19it/s]
Eliminating: rain: 100%|██████████| 2/2 [00:00<00:00, 295.84it/s]

+--------------+------------------+
| sprinkler    |   phi(sprinkler) |
| sprinkler(0) |           0.0880 |
+--------------+------------------+
| sprinkler(1) |           0.9120 |
+--------------+------------------+





P(wetgrass| rain = False, sprinkler = False)

In [42]:
prob_evidence = model_infer.query(variables = ['wetgrass'],
                                  evidence = {'rain': 0, 'sprinkler': 0}) 
print(prob_evidence)

Finding Elimination Order: : : 0it [00:00, ?it/s]
0it [00:00, ?it/s]

+-------------+-----------------+
| wetgrass    |   phi(wetgrass) |
| wetgrass(0) |          0.0000 |
+-------------+-----------------+
| wetgrass(1) |          1.0000 |
+-------------+-----------------+





# Cloudy Model

In [0]:
model = BayesianModel([('Cloudy', 'Sprinkler'),
       ('Cloudy', 'Rain'),
       ('Sprinkler', 'Wet_Grass'),
       ('Rain', 'Wet_Grass')])
       
# Cloudy
cpt_cloudy = TabularCPD(variable='Cloudy', variable_card=2, values=[[0.3], [0.7]])
print(cpt_cloudy)

# Sprinkler
cpt_sprinkler = TabularCPD(variable='Sprinkler', variable_card=2,
                           values=[[0.4, 0.9], [0.6, 0.1]],
                           evidence=['Cloudy'], evidence_card=[2])
print(cpt_cloudy)
# Rain
cpt_rain = TabularCPD(variable='Rain', variable_card=2,
                      values=[[0.8, 0.2], [0.2, 0.8]],
                      evidence=['Cloudy'], evidence_card=[2])
print(cpt_rain)

# Wet Grass
cpt_wet_grass = TabularCPD(variable='Wet_Grass', variable_card=2,
                           values=[[1, 0.1, 0.1, 0.01],
                                  [0, 0.9, 0.9, 0.99]],
                           evidence=['Sprinkler', 'Rain'],
                           evidence_card=[2, 2])
print(cpt_wet_grass)

In [0]:
from pgmpy.inference import VariableElimination

model_infer = VariableElimination(model)

# References

Book: Mastering Probabilistic Graphical Models Using Python, Ankur and Abinash, Packt publishing, 2015


1. https://healthyalgorithms.com/2011/11/23/causal-modeling-in-python-bayesian-networks-in-pymc/
2. https://stats.stackexchange.com/questions/4687/specifying-conditional-probabilities-in-hybrid-bayesian-networks
3. EPFL homework: https://www.epfl.ch/labs/lions/wp-content/uploads/2019/01/Homework_2.pdf
4. bnlearn package that was built on pgmpy: https://github.com/erdogant/bnlearn/
5. Kevin Murphy: Brief theoretical notes: https://www.cs.ubc.ca/~murphyk/Bayes/bnintro.html#repr
6. Application of Probabilistic Graphical Models
in Forecasting Crude Oil Price: UCL thesis- Makes use of pgmpy and HMM. Discretised time series data
7. https://discourse.pymc.io/t/intercausal-reasoning-in-bayesian-networks/2419
8. Thesis- Crude oil prices: https://arxiv.org/pdf/1804.10869.pdf

In [0]:
model = BayesianModel([('Cloudy', 'Sprinkler'),
       ('Cloudy', 'Rain'),
       ('Sprinkler', 'Wet_Grass'),
       ('Rain', 'Wet_Grass')])
       
# Cloudy
cpt_cloudy = TabularCPD(variable='Cloudy', variable_card=2, values=[[0.3], [0.7]])
print(cpt_cloudy)

# Sprinkler
cpt_sprinkler = TabularCPD(variable='Sprinkler', variable_card=2,
                           values=[[0.4, 0.9], [0.6, 0.1]],
                           evidence=['Cloudy'], evidence_card=[2])
print(cpt_cloudy)
# Rain
cpt_rain = TabularCPD(variable='Rain', variable_card=2,
                      values=[[0.8, 0.2], [0.2, 0.8]],
                      evidence=['Cloudy'], evidence_card=[2])
print(cpt_rain)

# Wet Grass
cpt_wet_grass = TabularCPD(variable='Wet_Grass', variable_card=2,
                           values=[[1, 0.1, 0.1, 0.01],
                                  [0, 0.9, 0.9, 0.99]],
                           evidence=['Sprinkler', 'Rain'],
                           evidence_card=[2, 2])
print(cpt_wet_grass)