# Chapter 11 - Bayesian Causal Effect Graphical Inference

The notebook is a code companion to chapter 11 of the book [Causal AI](https://www.manning.com/books/causal-ai) by [Robert Osazuwa Ness](https://www.linkedin.com/in/osazuwa/). This code is aligned with the code in the text.

<a href="https://github.com/altdeep/causalML/blob/master/book/chapter%2011/Chapter_11_Bayesian_Causal_Graphical_Inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pyro-ppl==1.9
!pip install graphviz==0.20
!pip install pandas==1.5.3
!pip install torch==2.2.1+cu121

In [None]:
import pandas as pd
import torch

df = pd.read_csv("https://raw.githubusercontent.com/altdeep/causalML/master/datasets/online_game_ate.csv")    #A
df = df[["Guild Membership", "Side-quest Engagement", "Won Items", "In-game Purchases"]]   #B

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")    #C
data = {    #C
    col: torch.tensor(df[col].values, dtype=torch.float32).to(device)    #C
    for col in df.columns    #C
}    #C


#A Load the data.
#B Drop everything but Guild Membership, Side-quest Engagement, Won Items, In-game Purchases.
#C Convert the data to tensors and dynamically set the device for performing tensor computations depending on the availability of a CUDA-enabled GPU.

In [None]:
import torch.nn as nn

class Confounders2Engagement(nn.Module):
    def __init__(
        self,
        input_dim=1+1,    #A
        hidden_dim=5    #B
    ):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)   #C
        self.f_engagement_ρ = nn.Linear(hidden_dim, 1)    #D
        self.softplus = nn.Softplus()    #E
        self.sigmoid = nn.Sigmoid()    #F

    def forward(self, input):
        input = input.t()
        hidden = self.softplus(self.fc1(input))    #G
        ρ_engagement = self.sigmoid(self.f_engagement_ρ(hidden))    #H
        ρ_engagement = ρ_engagement.t().squeeze(0)
        return ρ_engagement

#A Input is confounder proxy Z concatenated with Guild Membership.
#B Choosing a hidden dimension of width 5.
#C Linear map from input to hidden dimension.
#D Linear map from hidden dimension to In-Game Purchases location parameter.
#E Activation function for hidden layer.
#F Activation function for Side-quest engagement parameter.
#G From input to hidden layer
#H From hidden layer to ρ_engagement

In [None]:
class PurchasesNetwork(nn.Module):
    def __init__(
        self,
        input_dim=1+1+1,    #A
        hidden_dim=5    #B
    ):
        super().__init__()
        self.f_hidden = nn.Linear(input_dim, hidden_dim)   #C
        self.f_purchase_μ = nn.Linear(hidden_dim, 1)    #D
        self.f_purchase_σ = nn.Linear(hidden_dim, 1)    #E
        self.softplus = nn.Softplus()    #F

    def forward(self, input):
        input = input.t()
        hidden = self.softplus(self.f_hidden(input))    #G
        μ_purchases = self.f_purchase_μ(hidden)   #H
        σ_purchases = 1e-6 + self.softplus(self.f_purchase_σ(hidden))    #I
        μ_purchases = μ_purchases.t().squeeze(0)
        σ_purchases = σ_purchases.t().squeeze(0)
        return μ_purchases, σ_purchases

#A Input is confounder proxy Z concatenated with Guild Membership and Won Items.
#B Choosing a hidden dimension of width 5.
#C Linear map from input to hidden dimension.
#D Linear map from hidden dimension to In-Game Purchases location parameter.
#E Linear map from hidden dimension to In-Game Purchases location parameter.
#F Activation function for hidden layer.
#G From input to hidden layer.
#H Mapping from hidden layer to location parameter for purchases.
#I Mapping from hidden layer scale parameter for purchases.

In [None]:
from pyro import sample
from pyro.distributions import Bernoulli, Normal
from torch import tensor, stack


def model(params, device=device):    #A
    z = sample("Z", Normal(tensor(0.0, device=device), tensor(1.0, device=device)))
    is_guild_member = sample("Guild Membership", Bernoulli(params['ρ_member']))    #C
    engagement_input = stack((is_guild_member, z)).to(device)   #D
    ρ_engagement = confounders_2_engagement(engagement_input)    #D
    is_highly_engaged = sample("Side-quest Engagement", Bernoulli(ρ_engagement))    #E
    p_won = (    #F
        params['ρ_won_engaged'] * is_highly_engaged +    #F
        params['ρ_won_not_engaged'] * (1 - is_highly_engaged)    #F
    )    #F
    won_items = sample("Won Items", Bernoulli(p_won))    #F
    purchase_input = stack((won_items, is_guild_member, z)).to(device)    #G
    μ_purchases, σ_purchases = purchases_network(purchase_input)    #G
    in_game_purchases = sample("In-game Purchases", Normal(μ_purchases, σ_purchases))    #H

#A The causal model.
#B A latent variable that acts as a proxy for other confounders.
#C Whether someone is in a guild.
#D Use confounders_2_engagement map is_guild_member and z to a parameter for Side-quest Engagement and In-game Purchases.
#E Modeling Side-quest Engagement
#F Modeling amount of won items
#G Use confounders_2_purchases to map is_guild_member, z, and won-items to in_game_purchases.
#H Model in_game_purchases

In [None]:
import pyro
from pyro import render_model, plate
from pyro.distributions import Beta
from pyro import render_model

confounders_2_engagement = Confounders2Engagement().to(device)    #A
purchases_network = PurchasesNetwork().to(device)    #A


def data_model(data, device=device):
    pyro.module("confounder_2_engagement", confounders_2_engagement)    #B
    pyro.module("confounder_2_purchases", purchases_network)    #B
    params = {
        'ρ_member': sample('ρ_member', Beta(tensor(5., device=device), tensor(5., device=device))),    #C
        'ρ_won_engaged': sample('ρ_won_engaged', Beta(tensor(5., device=device), tensor(2., device=device))),    #D
        'ρ_won_not_engaged': sample('ρ_won_not_engaged', Beta(tensor(2., device=device), tensor(5., device=device))),    #E
    }
    N = len(data["In-game Purchases"])
    with plate("N", N):    #F
        model(params)    #F

render_model(data_model, (data, ))

#A Initialize the neural networks
#B pyro.module lets Pyro know about all the parameters inside the networks
#C Sample from prior distribution for ρ_member
#E Sample prior distribution for ρ_won_not_engaged
#F The plate context manager declares N independent samples (observations) from the causal variabeles.

In [None]:
class Encoder(nn.Module):
    def __init__(self, input_dim=3, #A
                 z_dim=1,    #B
                 hidden_dim=5):    #C
        super().__init__()
        self.f_hidden = nn.Linear(input_dim, hidden_dim)
        self.f_loc = nn.Linear(hidden_dim, z_dim)
        self.f_scale = nn.Linear(hidden_dim, z_dim)
        self.softplus = nn.Softplus()

    def forward(self, input):
        input = input.t()
        hidden = self.softplus(self.f_hidden(input))    #D
        z_loc = self.f_loc(hidden)   #E
        z_scale = 1e-6 + self.softplus(self.f_scale(hidden))    #F
        return z_loc.t().squeeze(0), z_scale.t().squeeze(0)

#A Input dimension is 3 because it will combine Side-quest Engagement, In-Game Purchases, and Guild Membership.
#B I use a simple univarite Z, but one could give it higher dimension with sufficient data.
#C The width of the hidden layer is 5.
#D Go from input to hidden layer.
#E Mapping from hidden layer to location parameter for Z.
#F Mapping from hidden layer scale parameter to Z.

In [None]:
from pyro import param
from torch.distributions.constraints import positive

encoder = Encoder().to(device)

def guide(data, device=device):
    pyro.module("encoder", encoder)
    α_member = param("α_member", tensor(1.0, device=device),    #A
                     constraint=positive)    #A
    β_member = param("β_member", tensor(1.0, device=device),    #A
                        constraint=positive)    #A
    sample('ρ_member', Beta(α_member, β_member))    #A
    α_won_engaged = param("α_won_engaged", tensor(5.0, device=device),    #B
                         constraint=positive)    #B
    β_won_engaged = param("β_won_engaged", tensor(2.0, device=device),    #B
                        constraint=positive)    #B
    sample('ρ_won_engaged', Beta(α_won_engaged, β_won_engaged))    #B
    α_won_not_engaged = param("α_won_not_engaged", tensor(2.0, device=device),    #B
                         constraint=positive)    #B
    β_won_not_engaged = param("β_won_not_engaged", tensor(5.0, device=device),    #B
                        constraint=positive)    #B
    sample('ρ_won_not_engaged', Beta(α_won_not_engaged, β_won_not_engaged))    #B
    N = len(data["In-game Purchases"])
    with pyro.plate("N", N):
        z_input = torch.stack(    #C
            (data["Guild Membership"],    #C
             data["Side-quest Engagement"],    #C
             data["In-game Purchases"])    #C
        ).to(device)    #C
        z_loc, z_scale = encoder(z_input)    #C
        pyro.sample("Z", Normal(z_loc, z_scale))    #C


#A The guide samples ρ_member from a Beta distribution where the shape parameters are trainable.
#B ρ_won_engaged and p_won_not_engaged are also sampled from Beta distributions with trainable parameters.
#C Z is sampled from a Normal with parameters returned by the encoder.

In [None]:
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
from pyro import condition

pyro.clear_param_store()    #A
adam_params = {"lr": 0.0001, "betas": (0.90, 0.999)}    #B
optimizer = Adam(adam_params)    #B
training_model = condition(data_model, data)    #C
svi = SVI(training_model, guide, optimizer, loss=Trace_ELBO())    #D
elbo_values = []    #E
N = len(data['In-game Purchases'])    #E
for step in range(500000):    #E
    loss = svi.step(data) / N    #E
    elbo_values.append(loss)   #E
    if step % 500 == 0:    #E
        print(loss)    #E
#A Erase parameter values in case you restart the training loop.
#B Setup Adam optimizer. A learning rate ("lr") of 0.001 may work better if using CUDA.
#C Condition the data_model on the observed data
#D Setup SVI.
#E Run the training loop.

In [None]:
import matplotlib.pyplot as plt

plt.plot([math.log(item) for item in elbo_values])    #A
plt.xlabel('Step')    #A
plt.ylabel('Log-Loss')    #A
plt.title('Training Loss')    #A
plt.show()    #A
#A Plot the log of training loss since loss is initially large.

In [None]:
 pyro.param("α_member"), pyro.param("β_member"), pyro.param("α_won_engaged"), pyro.param("β_won_engaged"), pyro.param("α_won_not_engaged"), pyro.param("β_won_not_engaged")

In [None]:
#!pip install seaborn
import matplotlib.pyplot as plt
import seaborn as sns
from pyro.infer import Predictive

predictive = Predictive(data_model, guide=guide, num_samples=1000)    #A
predictive_samples = predictive(data)    #A

for i, sample_data in enumerate(predictive_samples["In-game Purchases"]):    #B
    if i == 0:    #B
        sns.kdeplot(sample_data, color="lightgrey", label="Predictive density")    #B
    else:    #B
        sns.kdeplot(sample_data, color="lightgrey", linewidth=0.2, alpha=0.5)    #B

sns.kdeplot(    #C
    data['In-game Purchases'],    #C
    color="black",    #C
    linewidth=1,    #C
    label="Empirical density"    #C
)    #C

plt.legend()
plt.title("Posterior Predictive Check of In-game Purchases")
plt.xlabel("Value")
plt.ylabel("Density")
plt.show()

In [None]:
from pyro import do

data_model_high_engagement = do(data_model, {"Side-quest Engagement": 1.})    #A
predictive_high_engagement = Predictive(data_model_high_engagement, guide=guide, num_samples=1000)    #B
predictive_high_engagement_samples = predictive_high_engagement(data)    #B

for i, sample_data in enumerate(predictive_high_engagement_samples["In-game Purchases"]):    #C
    sns.kdeplot(sample_data, color="lightgrey", linewidth=0.2, alpha=0.5)    #C
plt.legend()    #C
plt.title("Posterior predictive samples of $P(I_{E=1})$'s density curves")    #C
plt.xlabel("Value")    #C
plt.ylabel("Density")    #C
plt.show()    #C

#A Use's numpyro's "do" intervention operation to transform the modeled.
#B Forward generate samples from the transformed model conditional on the samples from the posterior. Simple forward generation is all we need in this case. A conditional query (P(IE=1|E=0)) would require a more general inference algorithm.
#C Plot the results.

In [None]:
expectation_high_engagement = predictive_high_engagement_samples['In-game Purchases'].mean(1)    #A
data_model_low_engagement = do(data_model, {"Side-quest Engagement": 0.})    #A
predictive_low_engagement = Predictive(data_model_low_engagement, guide=guide, num_samples=1000)    #B
predictive_low_engagement_samples = predictive_low_engagement(data)    #B
expectation_low_engagement = predictive_low_engagement_samples['In-game Purchases'].mean(1)    #B
ate_distribution = expectation_high_engagement - expectation_low_engagement    #C

sns.kdeplot(ate_distribution)    #D
plt.title("Posterior distribution of the ATE")    #D
plt.xlabel("Value")    #D
plt.ylabel("Density")    #D
plt.show()    #D

#A Estimate posterior predictive distribution of E(Y_E=1).
#B Estimate posterior predictive distribution of  E(Y_E=0).
#C Calculate  posterior predictive distribution of ATE = E(Y_E=1) - E(Y_E=0)
#D Plot the results.