<a href="https://colab.research.google.com/github/altdeep/causalML/blob/master/book/chapter%208/Chapter_8_Counterfactual_Examples.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pyro-ppl
!pip install dowhy


In [None]:
import torch
from torch import tensor
import torch.distributions.constraints as constraints

from dowhy import gcm

import pyro
from pyro import deterministic, param, sample
from pyro.contrib.autoname import scope
from pyro.distributions import Normal
from pyro.poutine import condition, do, reparam

from pyro.optim import Adam
from pyro.infer import SVI, Trace_ELBO


import networkx as nx
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt


DoWhy GCM full model ground truth

In [None]:
Z = np.random.normal(loc=0, scale=1, size=10000)
X = 2 * Z + np.random.normal(loc=0, scale=1, size=10000)
Y = 3 * X - 2 * Z + np.random.normal(loc=0, scale=1, size=10000)
training_data = pd.DataFrame(data=dict(X=X, Y=Y, Z=Z))

In [None]:
causal_model = gcm.InvertibleStructuralCausalModel(nx.DiGraph([("Z", "X"), ("X", "Y"), ("Z", "Y")]))
causal_model.set_causal_mechanism("Z", gcm.EmpiricalDistribution())
causal_model.set_causal_mechanism("X", gcm.AdditiveNoiseModel(gcm.ml.create_linear_regressor()))
causal_model.set_causal_mechanism("Y", gcm.AdditiveNoiseModel(gcm.ml.create_linear_regressor()))
gcm.fit(causal_model, training_data)

In [None]:
gcm.counterfactual_samples(
    causal_model,
    {'X': lambda x :2},
    observed_data=pd.DataFrame(data=dict(Z=[1], X=[-1], Y=[1]))
)

Given our equations:

\begin{align}
Z &= N_z\\
X &= 2 * Z + N_x\\
Y &= 3 * X - 2 * Z + N_y
\end{align}

If X is -1.0, Y is 1.0, and Z is 1.0, then we have:

\begin{align} 
1 &= N_z\\
-1 &= 2 * Z + N_x\\
1 &= 3 * X - 2 * Z + N_y
\end{align}

That's three equations and three unknowns, so we can solve for $N_z$, $N_x$, and $N_y$ directly, and get $N_z = 1, N_x = -3, N_y = 6$

In the counterfactual world, we intervene and set X to 2.  The intervention operation changes the SCM to:

\begin{align}
Z &= N_z\\
X &= 2 \\
Y &= 3 * X - 2 * Z + N_y
\end{align}

Finally, we plug in values of Z we solved for.

\begin{align}
Z &= 1\\
X &= 2 \\
Y &= 3 * X - 2 * Z + 6
\end{align}

So Y is 3 * 2 - 2 * 1 + 6 =  10

Try to make it work in degenerative case.  Should work with a little noise.

In [None]:
def f_Z(Nz):
    Z = Nz
    return Z

def f_X(Z, Nx):
    X = Z + Nx
    return X

def f_Y(X, Z, Ny):
    Y = 3 * X  -2 * Z + Ny
    return Y

def model():
    Nz = sample("Nz", Normal(0.0, 1.0))
    Nx = sample("Nx", Normal(0.0, 1.0))
    Ny = sample("Ny", Normal(0.0, 1.0))
    Z = sample("Z", Normal(f_Z(Nz), .001))
    X = sample("X", Normal(f_X(Z, Nx), .001))
    Y = sample("Y", Normal(f_Y(X, Z, Ny), .001))
    return Z, X, Y

def guide():
    mu_z = param("μ_z", torch.tensor(1.0))
    scale_z = param("σ_z", torch.tensor(1.0),
                        constraint=constraints.positive)
    mu_x = param("μ_x", torch.tensor(1.0))
    scale_x = param("σ_x", torch.tensor(1.0),
                        constraint=constraints.positive)
    mu_y = param("μ_y", torch.tensor(1.0))
    scale_y = param("σ_y", torch.tensor(1.0),
                        constraint=constraints.positive)
    Nz = sample("Nz", Normal(mu_z, scale_z))
    Nx = sample("Nx", Normal(mu_x, scale_x))
    Ny = sample("Ny", Normal(mu_y, scale_y))
    
    return Z, X, Y

soften = reparam(config=pyro.infer.reparam.AutoReparam())
cond_model = condition(
    model,
    {"X": tensor(-1.0), "Y": tensor(1.0), "Z": tensor(1.0)}
)

pyro.clear_param_store()

# setup the optimizer
optimizer = Adam({"lr": 0.01})

# setup the inference algorithm
svi = SVI(cond_model, guide, optimizer, loss=Trace_ELBO())

# do gradient steps
losses= []
M = 20000
for step in range(M):
    loss = svi.step()
    losses.append(loss)
    if step % 1000 == 0:
        print("loss: ", loss)

Now trying it when Z is unobserved.

In [None]:
plt.plot(range(len(losses)), losses)

# grab the learned variational parameters
μ_z_post = pyro.param("μ_z").item()
σ_z_post = pyro.param("σ_z").item()
μ_x_post = pyro.param("μ_x").item()
σ_x_post = pyro.param("σ_x").item()
μ_y_post = pyro.param("μ_y").item()
σ_y_post = pyro.param("σ_y").item()

print(μ_z_post, σ_z_post, μ_x_post, σ_x_post, μ_y_post, σ_y_post)
print("Expect: N_z = 1, N_x = -3, N_y = 6")
#plt.hist([float(cf_model()[2]) for _ in range(1000)])

In [None]:


def model():
    Nz = sample("Nz", Normal(μ_z_post, σ_z_post))
    Nx = sample("Nx", Normal(μ_x_post, σ_x_post))
    Ny = sample("Ny", Normal(μ_y_post, σ_y_post))
    Z = deterministic("Z", f_Z(Nz))
    X = deterministic("X", f_X(Z, Nx))
    Y = deterministic("Y", f_Y(X, Z, Ny))
    return Z, X, Y


Now trying with Z unobserved.  Now the values of Nx, Ny, and Nz are not known with certainty.  But we can model them probabilistically.

In [None]:
def guide():
    mu_z = param("μ_z", torch.tensor(1.0))
    scale_z = param("σ_z", torch.tensor(1.0),
                        constraint=constraints.positive)
    mu_x = param("μ_x", torch.tensor(1.0))
    scale_x = param("σ_x", torch.tensor(1.0),
                        constraint=constraints.positive)
    mu_y = param("μ_y", torch.tensor(1.0))
    scale_y = param("σ_y", torch.tensor(1.0),
                        constraint=constraints.positive)
    Nz = sample("Nz", Normal(mu_z, scale_z))
    Nx = sample("Nx", Normal(mu_x, scale_x))
    Ny = sample("Ny", Normal(mu_y, scale_y))
    Z = sample("Z", Normal(Nz, .001))
    
    return Z, X, Y

soften = reparam(config=pyro.infer.reparam.AutoReparam())
cond_model = condition(
    model,
    {"X": tensor(-1.0), "Y": tensor(1.0)}
)

pyro.clear_param_store()

# setup the optimizer
optimizer = Adam({"lr": 0.005})

# setup the inference algorithm
svi = SVI(cond_model, guide, optimizer, loss=Trace_ELBO())

# do gradient steps
losses= []
M = 20000
for step in range(M):
    loss = svi.step()
    losses.append(loss)
    if step % 1000 == 0:
        print("loss: ", loss)

Due to properties of Gaussian distibutions, P(Nx, Ny, Nz | X, Y) is a multivariate Gaussian (can confirm on stats site).

A more general confirmation is to match moments.

$$
\begin{align}
E(N_y) &= 3 - E(N_x)\\
var(N_y) &= var(N_x)\\
E(N_z) &= -(1 + E(N_x))/2\\
var(N_z) &= var(N_x)/4
\end{align}
$$

In [None]:
μ_z_post = pyro.param("μ_z").item()
σ_z_post = pyro.param("σ_z").item()
μ_x_post = pyro.param("μ_x").item()
σ_x_post = pyro.param("σ_x").item()
μ_y_post = pyro.param("μ_y").item()
σ_y_post = pyro.param("σ_y").item()

print("Expect u_y + u_x = 3.  Summing I get " + str(round(μ_y_post + μ_x_post, 2)))
print("Expect σ_x - σ_y = 0.  I get " + str(round(σ_x_post + σ_y_post, 4)))
print("E(N_z) + (1 + E(N_x))/2 = 0.  I get " + str(round(μ_z_post + .5 * (1 + μ_x_post),2)))
print("var(N_z) - var(N_x)/4 = 0.  I get " + str(round(σ_z_post**2 - σ_x_post**2/4, 4)))

print(
    round(μ_z_post, 2),
    round(σ_z_post, 4), 
    round(μ_x_post, 2),
    round(σ_x_post, 4),
    round(μ_y_post, 2),
    round(σ_y_post, 4),
)


In [None]:
from pyro.infer.discrete import infer_discrete
from pyro.distributions import Bernoulli
def model():
    Nx = sample("Nx", Bernoulli(.5))
    Ny = sample("Ny", Bernoulli(.5))
    X = sample("X", Bernoulli(Ny))
    Y = sample("Y", Bernoulli(X*Ny + (1.-X)(1.-Ny)))
    return Nx, Ny, X, Y

cond_model = pyro.condition(model, {"X": 1.0, "Y": 1.0})
sampler = infer_discrete(cond_model, first_available_dim=-1)
sampler()


In [None]:
import pyro.distributions as dist
from pyro.infer import SVI, Trace_ELBO, TraceEnum_ELBO, config_enumerate, infer_discrete

@config_enumerate
def model():
    p = pyro.param("p", torch.randn(3, 3).exp(), constraint=constraints.simplex)
    x = pyro.sample("x", dist.Categorical(p[0]))
    y = pyro.sample("y", dist.Categorical(p[x]))
    z = pyro.sample("z", dist.Categorical(p[y]))
    print(f"  model x.shape = {x.shape}")
    print(f"  model y.shape = {y.shape}")
    print(f"  model z.shape = {z.shape}")
    return x, y, z

def guide():
    pass

serving_model = infer_discrete(model, first_available_dim=-1)
x, y, z = serving_model()