# Simpsons Paradox

## Installing Packages

In [None]:
pip install pandas



In [None]:
pip install pgmpy



## Importing packages

In [None]:
import pandas as pd
from pgmpy.models import BayesianModel as bm
from pgmpy.inference import VariableElimination


## Read data

In [None]:
click_data = pd.read_csv("clickdata.csv",usecols=('X','Y','Z'))


In [None]:
click_data

Unnamed: 0,X,Y,Z
0,unengaged,no click,young
1,unengaged,no click,young
2,unengaged,no click,young
3,unengaged,no click,young
4,unengaged,no click,young
...,...,...,...
115,engaged,click,young
116,engaged,click,young
117,engaged,click,old
118,engaged,click,old


## Creating model

In [None]:
model = bm()
model.add_edges_from(
    [
      ('Z','X'),
      ('X','Y'),
      ('Z','Y')
      
    ]
)

model.fit(click_data)

In [None]:
print(model.get_cpds()[0])

+----------+-----+
| Z(old)   | 0.5 |
+----------+-----+
| Z(young) | 0.5 |
+----------+-----+


In [None]:
print(model.get_cpds()[1])

+--------------+--------------------+--------------------+
| Z            | Z(old)             | Z(young)           |
+--------------+--------------------+--------------------+
| X(engaged)   | 0.6666666666666666 | 0.3333333333333333 |
+--------------+--------------------+--------------------+
| X(unengaged) | 0.3333333333333333 | 0.6666666666666666 |
+--------------+--------------------+--------------------+


In [None]:
print(model.get_cpds()[2])

+-------------+------------+------------+--------------+--------------+
| X           | X(engaged) | X(engaged) | X(unengaged) | X(unengaged) |
+-------------+------------+------------+--------------+--------------+
| Z           | Z(old)     | Z(young)   | Z(old)       | Z(young)     |
+-------------+------------+------------+--------------+--------------+
| Y(click)    | 0.075      | 0.4        | 0.05         | 0.3          |
+-------------+------------+------------+--------------+--------------+
| Y(no click) | 0.925      | 0.6        | 0.95         | 0.7          |
+-------------+------------+------------+--------------+--------------+


## Inference

In [None]:
inference = VariableElimination(model)

In [None]:
theta_u = inference.query(['Y'],{'X': 'unengaged'})

Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 273.89it/s]
Eliminating: Z: 100%|██████████| 1/1 [00:00<00:00, 162.60it/s]


In [None]:
print(theta_u)
theta_u.values[0]

+-------------+----------+
| Y           |   phi(Y) |
| Y(click)    |   0.2167 |
+-------------+----------+
| Y(no click) |   0.7833 |
+-------------+----------+


0.21666666666666667

In [None]:
theta_e = inference.query(['Y'],{'X': 'engaged'})

Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 241.38it/s]
Eliminating: Z: 100%|██████████| 1/1 [00:00<00:00, 236.77it/s]


In [None]:
print(theta_e)
theta_e.values[0]

+-------------+----------+
| Y           |   phi(Y) |
| Y(click)    |   0.1833 |
+-------------+----------+
| Y(no click) |   0.8167 |
+-------------+----------+


0.18333333333333332

In [None]:
theta_e.values[0] - theta_u.values[0]

-0.033333333333333354

## Updated model inference

In [None]:
model = model.do('X')
inference = VariableElimination(model)

In [None]:
gamma_u = inference.query(['Y'],{'X': 'unengaged'})

Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 311.45it/s]
Eliminating: Z: 100%|██████████| 1/1 [00:00<00:00, 265.87it/s]


In [None]:
print(gamma_u)

+-------------+----------+
| Y           |   phi(Y) |
| Y(click)    |   0.1750 |
+-------------+----------+
| Y(no click) |   0.8250 |
+-------------+----------+


In [None]:
gamma_e = inference.query(['Y'],{'X': 'engaged'})

Finding Elimination Order: : 100%|██████████| 1/1 [00:00<00:00, 120.37it/s]
Eliminating: Z: 100%|██████████| 1/1 [00:00<00:00, 176.91it/s]


In [None]:
print(gamma_e)

+-------------+----------+
| Y           |   phi(Y) |
| Y(click)    |   0.2375 |
+-------------+----------+
| Y(no click) |   0.7625 |
+-------------+----------+


In [None]:
gamma_e.values[0] - gamma_u.values[0]

0.06250000000000003