https://www.youtube.com/watch?v=DEHqIxX1Kq4

In [0]:
from google.colab import drive
import os
drive.mount('gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at gdrive


In [0]:
import os
os.chdir('/content/gdrive/My Drive/Colab_Notebooks/graphical-models/')
os.listdir()

['x1.png', 'x2.png']

# Install 

In [0]:
!pip install pgmpy
#!git clone https://github.com/pgmpy/pgmpy 
#!cd pgmpy
#!pip install -r requirements.txt
#!python setup.py install

Collecting pgmpy
[?25l  Downloading https://files.pythonhosted.org/packages/68/6b/661a65aa7788f3aff7228ba81625c540917d656f41e3eb031c6d60b0a25d/pgmpy-0.1.10-py3-none-any.whl (339kB)
[K     |████████████████████████████████| 348kB 2.8MB/s 
Installing collected packages: pgmpy
Successfully installed pgmpy-0.1.10


In [0]:
# https://github.com/AileenNielsen/PyGotham_2016_Probabilistic_Graphical_Models/blob/master/pgmpy.ipynb
from pgmpy.factors.discrete import TabularCPD
from pgmpy.models import BayesianModel

# Setting up your model

### First, set the structure

![alt text](https://imghostr.com/image.php?sig=NLWDa2c8Y&h=e715637236ebf90b02fb117052dae147&t=1583961429)

In [0]:
olympic_model = BayesianModel([('Genetics', 'OlympicTrials'),
                             ('Practice', 'OlympicTrials'),
                             ('OlympicTrials', 'Offer')])

### Then set up the relationships (the CPDs)

In [0]:
genetics_cpd = TabularCPD(
                variable = 'Genetics',
                variable_card = 2,
                values = [[.2,.8]])

In [0]:
practice_cpd = TabularCPD(
                variable = 'Practice',
                variable_card = 2,
                values = [[.7,.3]])

In [0]:
offer_cpd = TabularCPD(
                    variable = 'Offer',
                    variable_card = 2,
                    values = [[.95, .8, .5],
                             [.05, .2, .5]],
                    evidence = ['OlympicTrials'],
                    evidence_card = [3])

In [0]:
olympic_trials_cpd = TabularCPD(
                        variable = 'OlympicTrials', 
                        variable_card = 3,
                        values = [[.5, .8, .8, .9],
                                 [.3, .15, .1, .08],
                                 [.2, .05, .1, .02]],
                        evidence = ['Genetics', 'Practice'],
                        evidence_card = [2,2])

### Add the relationships to your models

In [0]:
olympic_model.add_cpds (genetics_cpd, practice_cpd, offer_cpd, olympic_trials_cpd)

### Examine the structure of your graph

In [0]:
olympic_model.get_cpds()

[<TabularCPD representing P(Genetics:2) at 0x7fa1c96a3cc0>,
 <TabularCPD representing P(Practice:2) at 0x7fa1c96a3cf8>,
 <TabularCPD representing P(Offer:2 | OlympicTrials:3) at 0x7fa1c963c2e8>,
 <TabularCPD representing P(OlympicTrials:3 | Genetics:2, Practice:2) at 0x7fa1c963c470>]

### Find active trail nodes

In [0]:
olympic_model.active_trail_nodes('Genetics')

{'Genetics': {'Genetics', 'Offer', 'OlympicTrials'}}

In [0]:
olympic_model.active_trail_nodes('OlympicTrials')

{'OlympicTrials': {'Genetics', 'Offer', 'OlympicTrials', 'Practice'}}

### Find local independencies

In [0]:
olympic_model.local_independencies('Genetics')

(Genetics _|_ Practice)

In [0]:
olympic_model.local_independencies('OlympicTrials')



### Get all independencies

In [0]:
olympic_model.get_independencies()

(Genetics _|_ Practice)
(Genetics _|_ Offer | OlympicTrials)
(Genetics _|_ Offer | Practice, OlympicTrials)
(Practice _|_ Genetics)
(Practice _|_ Offer | OlympicTrials)
(Practice _|_ Offer | Genetics, OlympicTrials)
(Offer _|_ Practice, Genetics | OlympicTrials)
(Offer _|_ Genetics | Practice, OlympicTrials)
(Offer _|_ Practice | Genetics, OlympicTrials)

# Making inferences

### We can get probability distributions that are not explicitly spelled out in our graphs

In [0]:
from pgmpy.inference import VariableElimination

In [0]:
olympic_infer = VariableElimination(olympic_model)

# What is the probability of getting an offer to any person?

In [0]:
prob_offer = olympic_infer.query(variables = ['Offer'])
print(prob_offer)

Finding Elimination Order: : 100%|██████████| 3/3 [00:00<00:00, 483.68it/s]
Eliminating: OlympicTrials: 100%|██████████| 3/3 [00:00<00:00, 275.62it/s]

+----------+--------------+
| Offer    |   phi(Offer) |
| Offer(0) |       0.8898 |
+----------+--------------+
| Offer(1) |       0.1102 |
+----------+--------------+





# Combined probability of offer and genetics

In [0]:
prob_offer = olympic_infer.query(variables = ['Offer', 'Genetics'])
print(prob_offer)

Finding Elimination Order: : 100%|██████████| 2/2 [00:00<00:00, 225.55it/s]
Eliminating: OlympicTrials: 100%|██████████| 2/2 [00:00<00:00, 284.29it/s]

+-------------+----------+-----------------------+
| Genetics    | Offer    |   phi(Genetics,Offer) |
| Genetics(0) | Offer(0) |                0.1684 |
+-------------+----------+-----------------------+
| Genetics(0) | Offer(1) |                0.0316 |
+-------------+----------+-----------------------+
| Genetics(1) | Offer(0) |                0.7214 |
+-------------+----------+-----------------------+
| Genetics(1) | Offer(1) |                0.0786 |
+-------------+----------+-----------------------+





# What is the joint probability of getting offer, genetics and practice

In [0]:
prob_offer = olympic_infer.query(variables = ['Offer', 'Practice', 'Genetics'])
print(prob_offer)

Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 293.78it/s]
Eliminating: OlympicTrials: 100%|██████████| 1/1 [00:00<00:00, 252.81it/s]

+-------------+-------------+----------+--------------------------------+
| Genetics    | Practice    | Offer    |   phi(Genetics,Practice,Offer) |
| Genetics(0) | Practice(0) | Offer(0) |                         0.1141 |
+-------------+-------------+----------+--------------------------------+
| Genetics(0) | Practice(0) | Offer(1) |                         0.0259 |
+-------------+-------------+----------+--------------------------------+
| Genetics(0) | Practice(1) | Offer(0) |                         0.0543 |
+-------------+-------------+----------+--------------------------------+
| Genetics(0) | Practice(1) | Offer(1) |                         0.0057 |
+-------------+-------------+----------+--------------------------------+
| Genetics(1) | Practice(0) | Offer(0) |                         0.4984 |
+-------------+-------------+----------+--------------------------------+
| Genetics(1) | Practice(0) | Offer(1) |                         0.0616 |
+-------------+-------------+---------




# We can also get conditional probability distributions that take into account what we already know

In [0]:
prob_offer_good_genes = olympic_infer.query(
                                        variables = ['Offer', 'OlympicTrials'])
print(prob_offer_good_genes)

Finding Elimination Order: : 100%|██████████| 2/2 [00:00<00:00, 300.11it/s]
Eliminating: Genetics: 100%|██████████| 2/2 [00:00<00:00, 425.26it/s]

+------------------+----------+----------------------------+
| OlympicTrials    | Offer    |   phi(OlympicTrials,Offer) |
| OlympicTrials(0) | Offer(0) |                     0.7429 |
+------------------+----------+----------------------------+
| OlympicTrials(0) | Offer(1) |                     0.0391 |
+------------------+----------+----------------------------+
| OlympicTrials(1) | Offer(0) |                     0.1010 |
+------------------+----------+----------------------------+
| OlympicTrials(1) | Offer(1) |                     0.0252 |
+------------------+----------+----------------------------+
| OlympicTrials(2) | Offer(0) |                     0.0459 |
+------------------+----------+----------------------------+
| OlympicTrials(2) | Offer(1) |                     0.0459 |
+------------------+----------+----------------------------+





# What is the probability of getting an offer when good genes

In [0]:
prob_offer_bad_genes = olympic_infer.query(
                                        variables = ['Offer'], 
                                        evidence = {'Genetics':0})
print(prob_offer_bad_genes)

Finding Elimination Order: : 100%|██████████| 2/2 [00:00<00:00, 589.96it/s]
Eliminating: OlympicTrials: 100%|██████████| 2/2 [00:00<00:00, 445.07it/s]

+----------+--------------+
| Offer    |   phi(Offer) |
| Offer(0) |       0.8420 |
+----------+--------------+
| Offer(1) |       0.1580 |
+----------+--------------+





#prob_offer_good_genes_did_practice

In [0]:
prob_offer_good_genes_did_practice = olympic_infer.query(
                                        variables = ['Offer'], 
                                        evidence = {'Genetics':0,
                                                    'Practice':0})
print(prob_offer_good_genes_did_practice)

Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 341.92it/s]
Eliminating: OlympicTrials: 100%|██████████| 1/1 [00:00<00:00, 449.60it/s]

+----------+--------------+
| Offer    |   phi(Offer) |
| Offer(0) |       0.8150 |
+----------+--------------+
| Offer(1) |       0.1850 |
+----------+--------------+





### You can also go upstream logically. For example, evidence about performance at the Olympic Trials affects the probabilities of Genetics.

In [0]:
prob_good_genes_if_amazing_olympic_trials = olympic_infer.query(
                                        variables = ['Genetics'], 
                                        evidence = {'OlympicTrials':2})
print(prob_good_genes_if_amazing_olympic_trials)

Finding Elimination Order: : 100%|██████████| 2/2 [00:00<00:00, 510.19it/s]
Eliminating: Offer: 100%|██████████| 2/2 [00:00<00:00, 414.19it/s]

+-------------+-----------------+
| Genetics    |   phi(Genetics) |
| Genetics(0) |          0.3377 |
+-------------+-----------------+
| Genetics(1) |          0.6623 |
+-------------+-----------------+





# What is the probability of offer given OlympicTrials and Offer?

In [0]:
prob_good_genes_if_amazing_olympic_trials = olympic_infer.query(
                                        variables = ['Genetics'], 
                                        evidence = {'OlympicTrials':2, 'Offer': 1})
print(prob_good_genes_if_amazing_olympic_trials)

Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 165.65it/s]
Eliminating: Practice: 100%|██████████| 1/1 [00:00<00:00, 170.57it/s]

+-------------+-----------------+
| Genetics    |   phi(Genetics) |
| Genetics(0) |          0.3377 |
+-------------+-----------------+
| Genetics(1) |          0.6623 |
+-------------+-----------------+





### Some variables are only informative about other variables given 'third' variables

In [0]:
# Practice does not inherently tell us something about Genetics
prob_good_genes_if_no_practice = olympic_infer.query(
                                        variables = ['Genetics'], 
                                        evidence = {'Practice':1})
print(prob_good_genes_if_no_practice)

Finding Elimination Order: : 100%|██████████| 2/2 [00:00<00:00, 921.72it/s]
Eliminating: OlympicTrials: 100%|██████████| 2/2 [00:00<00:00, 480.12it/s]

+-------------+-----------------+
| Genetics    |   phi(Genetics) |
| Genetics(0) |          0.2000 |
+-------------+-----------------+
| Genetics(1) |          0.8000 |
+-------------+-----------------+





In [0]:
# BUT Practice does tell us something about genetics IF we also 
# know something about olympic trials performance
prob_good_genes_if_no_practice_good_olympic_trials = olympic_infer.query(
                                        variables = ['Genetics'], 
                                        evidence = {'Practice':1,
                                                   'OlympicTrials':2})
print(prob_good_genes_if_no_practice_good_olympic_trials)

Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 174.28it/s]
Eliminating: Offer: 100%|██████████| 1/1 [00:00<00:00, 281.97it/s]

+-------------+-----------------+
| Genetics    |   phi(Genetics) |
| Genetics(0) |          0.3846 |
+-------------+-----------------+
| Genetics(1) |          0.6154 |
+-------------+-----------------+





### We can find out the most probable state for a variable

In [0]:
olympic_infer.map_query(variables = ['Genetics'])

Finding Elimination Order: : 100%|██████████| 3/3 [00:00<00:00, 1341.32it/s]
Eliminating: OlympicTrials: 100%|██████████| 3/3 [00:00<00:00, 453.26it/s]


{'Genetics': 1}

In [0]:
olympic_infer.map_query(variables = ['Offer'])

Finding Elimination Order: : 100%|██████████| 3/3 [00:00<00:00, 555.93it/s]
Eliminating: OlympicTrials: 100%|██████████| 3/3 [00:00<00:00, 277.74it/s]


{'Offer': 0}

In [0]:
olympic_infer.map_query(variables = ['OlympicTrials'])

Finding Elimination Order: : 100%|██████████| 3/3 [00:00<00:00, 587.16it/s]
Eliminating: Genetics: 100%|██████████| 3/3 [00:00<00:00, 204.66it/s]


{'OlympicTrials': 0}

In [0]:
x = olympic_infer.query(variables = ['OlympicTrials'],
            evidence = {'Genetics': 1, 'Practice': 1})
print(x)

Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 113.82it/s]
Eliminating: Offer: 100%|██████████| 1/1 [00:00<00:00, 285.54it/s]

+------------------+----------------------+
| OlympicTrials    |   phi(OlympicTrials) |
| OlympicTrials(0) |               0.9000 |
+------------------+----------------------+
| OlympicTrials(1) |               0.0800 |
+------------------+----------------------+
| OlympicTrials(2) |               0.0200 |
+------------------+----------------------+



