Documentation : http://pgmpy.org/models.html

Video help is available here https://www.youtube.com/watch?v=gpKzZiUDjxM

In [None]:
!pip install pgmpy



In [None]:
from pgmpy.factors.discrete import TabularCPD
from pgmpy.models import BayesianModel

In [None]:
# First, set the structure
wc_model = BayesianModel([('Genetics', 'Performance'),
                          ('Practice', 'Performance'),
                          ('Performance', 'Offer')])

In [None]:
# Then set up the relationships (the CPDs)

genetics_cpd = TabularCPD(variable = 'Genetics',
                          variable_card = 2,
                          values = [[.2],[.8]])

In [None]:
practice_cpd = TabularCPD(variable = 'Practice',
                          variable_card = 2,
                          values = [[.7],[.3]])

In [None]:
offer_cpd = TabularCPD(variable = 'Offer',
                       variable_card = 2,
                       values = [[.95,.8,.5],
                                 [.05,.2,.5]],
                       evidence = ['Performance'],
                       evidence_card = [3]
                       )

In [None]:
wc_trails_cpd = TabularCPD(variable = 'Performance',
                       variable_card = 3,
                       values = [[.5, .8, .8, .9],
                                 [.3, .15, .1, .08],
                                 [.2, .05, .1, .02]],
                       evidence = ['Genetics', 'Practice'],
                       evidence_card = [2,2]
                       )

In [None]:
# Add the relationships to your models
wc_model.add_cpds(genetics_cpd, practice_cpd, offer_cpd, wc_trails_cpd)

In [None]:
#Examine the structure of your graph
wc_model.get_cpds()

[<TabularCPD representing P(Genetics:2) at 0x7f7ca64a9d90>,
 <TabularCPD representing P(Practice:2) at 0x7f7ca64b75d0>,
 <TabularCPD representing P(Offer:2 | Performance:3) at 0x7f7ca64c31d0>,
 <TabularCPD representing P(Performance:3 | Genetics:2, Practice:2) at 0x7f7ca64c3d50>]

In [None]:
#Find the active trail nodes
wc_model.active_trail_nodes('Genetics')

{'Genetics': {'Genetics', 'Offer', 'Performance'}}

In [None]:
wc_model.active_trail_nodes('Performance')

{'Performance': {'Genetics', 'Offer', 'Performance', 'Practice'}}

In [None]:
wc_model.active_trail_nodes('Practice')

{'Practice': {'Offer', 'Performance', 'Practice'}}

In [None]:
# Find Local independencies
wc_model.local_independencies('Genetics')  

(Genetics ⟂ Practice)

Here we see, only Practice is independent of Genetics.

In [None]:
wc_model.local_independencies('Performance')



Here, nothing is independent of Performance as every variable is dependent on Performance.

In [None]:
# Get all independencies
wc_model.get_independencies()

(Genetics ⟂ Practice)
(Genetics ⟂ Offer | Performance)
(Genetics ⟂ Offer | Practice, Performance)
(Practice ⟂ Genetics)
(Practice ⟂ Offer | Performance)
(Practice ⟂ Offer | Genetics, Performance)
(Offer ⟂ Genetics, Practice | Performance)
(Offer ⟂ Practice | Genetics, Performance)
(Offer ⟂ Genetics | Practice, Performance)

In [None]:
# Making inferences
# we can get probability distributions that are not explicitly spelled out in our

from pgmpy.inference import VariableElimination

In [None]:
wc_infer = VariableElimination(wc_model)

In [None]:
prob_offer = wc_infer.query(variables = ['Offer'])
print(prob_offer)

Finding Elimination Order: : 100%|██████████| 3/3 [00:00<00:00, 674.69it/s]
Eliminating: Performance: 100%|██████████| 3/3 [00:00<00:00, 228.84it/s]

+----------+--------------+
| Offer    |   phi(Offer) |
| Offer(0) |       0.8898 |
+----------+--------------+
| Offer(1) |       0.1102 |
+----------+--------------+





In [None]:
# We can also get conditional probability distribution that take into account what we already know

prob_offer_good_genes = wc_infer.query(variables=['Offer','Genetics'])
# print(prob_offer_good_genes['Offer'])
print(prob_offer_good_genes)

Finding Elimination Order: : 100%|██████████| 2/2 [00:00<00:00, 642.71it/s]
Eliminating: Performance: 100%|██████████| 2/2 [00:00<00:00, 303.86it/s]

+----------+-------------+-----------------------+
| Offer    | Genetics    |   phi(Offer,Genetics) |
| Offer(0) | Genetics(0) |                0.1684 |
+----------+-------------+-----------------------+
| Offer(0) | Genetics(1) |                0.7214 |
+----------+-------------+-----------------------+
| Offer(1) | Genetics(0) |                0.0316 |
+----------+-------------+-----------------------+
| Offer(1) | Genetics(1) |                0.0786 |
+----------+-------------+-----------------------+





In [None]:
prob_offer_bad_genes = wc_infer.query(variables=['Offer'],
                                      evidence = {'Genetics':1})
print(prob_offer_bad_genes)

Finding Elimination Order: : 100%|██████████| 2/2 [00:00<00:00, 442.74it/s]
Eliminating: Performance: 100%|██████████| 2/2 [00:00<00:00, 205.29it/s]

+----------+--------------+
| Offer    |   phi(Offer) |
| Offer(0) |       0.9017 |
+----------+--------------+
| Offer(1) |       0.0983 |
+----------+--------------+





In [None]:
prob_offer_good_genes = wc_infer.query(variables=['Offer'],
                                      evidence = {'Genetics':0})
print(prob_offer_good_genes)

Finding Elimination Order: : 100%|██████████| 2/2 [00:00<00:00, 489.79it/s]
Eliminating: Performance: 100%|██████████| 2/2 [00:00<00:00, 356.80it/s]

+----------+--------------+
| Offer    |   phi(Offer) |
| Offer(0) |       0.8420 |
+----------+--------------+
| Offer(1) |       0.1580 |
+----------+--------------+





In [None]:
prob_offer_good_did_practice = wc_infer.query(variables=['Offer'],
                                      evidence = {'Genetics':0, 'Practice':1})
print(prob_offer_good_did_practice)

Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 203.64it/s]
Eliminating: Performance: 100%|██████████| 1/1 [00:00<00:00, 187.02it/s]

+----------+--------------+
| Offer    |   phi(Offer) |
| Offer(0) |       0.9050 |
+----------+--------------+
| Offer(1) |       0.0950 |
+----------+--------------+





In [None]:
# You can also go upstream Logically.
## For example, evidence about performance at the WC Trails affects the probabilities of Genetics

prob_offer_genes_if_amazing_wc_trials = wc_infer.query(variables=['Genetics'],
                                      evidence = {'Performance':2})
print(prob_offer_genes_if_amazing_wc_trials)

Finding Elimination Order: : 100%|██████████| 2/2 [00:00<00:00, 432.87it/s]
Eliminating: Practice: 100%|██████████| 2/2 [00:00<00:00, 398.07it/s]

+-------------+-----------------+
| Genetics    |   phi(Genetics) |
| Genetics(0) |          0.3377 |
+-------------+-----------------+
| Genetics(1) |          0.6623 |
+-------------+-----------------+





In [None]:
# Some variables are only informative about other variables given 'third' variables

## Practice does not inherently tell us something about Genetics

prob_good_genes_if_no_practice = wc_infer.query(variables=['Genetics'],
                                      evidence = {'Practice':1})
print(prob_good_genes_if_no_practice)

Finding Elimination Order: : 100%|██████████| 2/2 [00:00<00:00, 506.65it/s]
Eliminating: Performance: 100%|██████████| 2/2 [00:00<00:00, 405.34it/s]

+-------------+-----------------+
| Genetics    |   phi(Genetics) |
| Genetics(0) |          0.2000 |
+-------------+-----------------+
| Genetics(1) |          0.8000 |
+-------------+-----------------+





In [None]:
# We can also find out the most probable state for a variable

wc_infer.map_query(variables=['Genetics'])

Finding Elimination Order: : 100%|██████████| 3/3 [00:00<00:00, 1213.40it/s]
Eliminating: Performance: 100%|██████████| 3/3 [00:00<00:00, 333.63it/s]


{'Genetics': 1}

In [None]:
wc_infer.map_query(variables=['Offer'])

Finding Elimination Order: : 100%|██████████| 3/3 [00:00<00:00, 1288.84it/s]
Eliminating: Performance: 100%|██████████| 3/3 [00:00<00:00, 337.25it/s]


{'Offer': 0}

In [None]:
wc_infer.map_query(variables=['Performance'])

Finding Elimination Order: : 100%|██████████| 3/3 [00:00<00:00, 922.37it/s]
Eliminating: Practice: 100%|██████████| 3/3 [00:00<00:00, 335.47it/s]


{'Performance': 0}

------------------------------