In [1]:
import numpy as np
import pandas as pd

## Problem setup

In [2]:
rng = np.random.default_rng()

In [3]:
def latent():
    U1 = rng.binomial(1, p=0.5)  # Alice tired
    U2 = rng.binomial(1, p=0.5)  # Alice had busy day
    U3 = rng.binomial(1, p=0.5)  # Alice hungry
    return U1, U2, U3

In [4]:
def SCM(U1, U2, U3, X=None):
    X = X if X is not None else (U1 ^ U2)  # Alice takes walk (walk -> mobile, no walk -> desktop)
    D = X ^ U3  # Advertisement type Alice sees
    # probabilities P(Y=1 | U1, U2, X)
    pY = {
        (0, 0, 0): 0.7,
        (0, 1, 0): 0.8,
        (0, 0, 1): 0.8,
        (0, 1, 1): 0.7,
        (1, 0, 0): 0.7,
        (1, 0, 1): 0.6,
        (1, 1, 0): 0.6,
        (1, 1, 1): 0.7
    }
    Y = rng.binomial(1, pY[(U1, U2, X)])  # Alice stay within social media limit
    return X, D, Y

In [5]:
U1, U2, U3 = latent()
X, D, Y = SCM(U1, U2, U3)
print("U1 (tired):", U1)
print("U2 (busy day):", U2)
print("U3 (hungry):", U3)
print("X (takes walk):", X)
print("D (advertisement type):", D)
print("Y (stay within limit):", Y)

U1 (tired): 1
U2 (busy day): 1
U3 (hungry): 1
X (takes walk): 0
D (advertisement type): 1
Y (stay within limit): 1


## Experiments

In [6]:
T:int = 1000000  # Number of rounds

### L1-regime

The observational strategy simply is Alice following her instincts

In [7]:
counter = {0: 0, 1: 0}
estimated_reward = {0: 0.0, 1: 0.0}

for t in range(T):
    U1, U2, U3 = latent()
    X, D, Y = SCM(U1, U2, U3)
    estimated_reward[X] += Y
    counter[X] += 1

for action in estimated_reward:
    estimated_reward[action] /= counter[action]

In [8]:
print("E[Y=1 | X=0]:", estimated_reward[0])
print("E[Y=1 | X=1]:", estimated_reward[1])
print("E[Y]:", (estimated_reward[0] + estimated_reward[1]) / 2)

E[Y=1 | X=0]: 0.6494034105115767
E[Y=1 | X=1]: 0.6500644903264511
E[Y]: 0.6497339504190138


### L2-regime

The interventional strategy is Alice randomizing her actions

In [9]:
counter = {0: 0, 1: 0}
estimated_reward = {0: 0.0, 1: 0.0}

for t in range(T):
    intervention = np.random.choice([0, 1])
    U1, U2, U3 = latent()
    X, D, Y = SCM(U1, U2, U3, X=intervention)
    estimated_reward[X] += Y
    counter[X] += 1

for action in estimated_reward:
    estimated_reward[action] /= counter[action]

In [10]:
print("E[Y=1 | do(X=0)]:", estimated_reward[0])
print("E[Y=1 | do(X=1)]:", estimated_reward[1])
print("E[Y]:", (estimated_reward[0] + estimated_reward[1]) / 2)

E[Y=1 | do(X=0)]: 0.7003618050652709
E[Y=1 | do(X=1)]: 0.6990262136330091
E[Y]: 0.69969400934914


### L3-regime

##### ETT-based L3 strategy

The ETT-based approach means Alice decides what she wants to do according to her instincts, logs that, and then randomizes the actual decision of what to do

In [11]:
counter = {(0,0): 0, (0,1): 0, (1,0): 0, (1,1): 0}  # keys are (natural value, intervention)
estimated_reward = {(0,0): 0.0, (0,1): 0.0, (1,0): 0.0, (1,1): 0.0}

for t in range(T):
    U1, U2, U3 = latent()

    # get the natural value of X
    x_prime, _, _ = SCM(U1, U2, U3)

    # randomize intervention
    intervention = np.random.choice([0, 1])
    X, D, Y = SCM(U1, U2, U3, X=intervention)
    estimated_reward[(x_prime, intervention)] += Y
    counter[(x_prime, intervention)] += 1

for action in estimated_reward:
    estimated_reward[action] /= counter[action]

In [12]:
for (x_prime, x), value in estimated_reward.items():
    print(f"E[Y=1_{{X={x_prime}}} | X={x}] = {value:.3f}")

E[Y=1_{X=0} | X=0] = 0.649
E[Y=1_{X=0} | X=1] = 0.751
E[Y=1_{X=1} | X=0] = 0.750
E[Y=1_{X=1} | X=1] = 0.651


##### Counterfactual Realizability

The L3 distribution P(Y_{x}, X, D_{x´´}) is realizable, thus we can sample from her natural choice X=x`, then randomly log in to see what ads she gets (D_{x``}) and again randomize how she actually uses the platform that day to get Y_{x}

In [13]:
from itertools import product
keys = list(product([0, 1], repeat=4))  # (x_prime, x_d, x_y, D)
counter = {k: 0 for k in keys}
estimated_reward = {k: 0.0 for k in keys}

for t in range(T):
    U1, U2, U3 = latent()

    # get the natural value of X
    x_prime, _, _ = SCM(U1, U2, U3)

    # get two independent interventions
    x = np.random.choice([0, 1])
    x_prime_prime = np.random.choice([0, 1])

    _, D, _ = SCM(U1, U2, U3, X=x)
    _, _, Y = SCM(U1, U2, U3, X=x_prime_prime)

    estimated_reward[(x_prime, x, x_prime_prime, D)] += Y
    counter[(x_prime, x, x_prime_prime, D)] += 1

for action in estimated_reward:
    estimated_reward[action] /= counter[action]

In [None]:
for (x_prime, x, x_prime_prime, d), value in estimated_reward.items():
    print(f"E[Y=1_{{X'={x_prime} | X={x}, X''={x_prime_prime}, D={d}}}] = {value:.3f}")

E[Y=1_{X'=0 | X=0, X''=0, D=0}] = 0.650
E[Y=1_{X'=0 | X=0, X''=0, D=1}] = 0.649
E[Y=1_{X'=0 | X=0, X''=1, D=0}] = 0.752
E[Y=1_{X'=0 | X=0, X''=1, D=1}] = 0.749
E[Y=1_{X'=0 | X=1, X''=0, D=0}] = 0.651
E[Y=1_{X'=0 | X=1, X''=0, D=1}] = 0.650
E[Y=1_{X'=0 | X=1, X''=1, D=0}] = 0.753
E[Y=1_{X'=0 | X=1, X''=1, D=1}] = 0.751
E[Y=1_{X'=1 | X=0, X''=0, D=0}] = 0.748
E[Y=1_{X'=1 | X=0, X''=0, D=1}] = 0.750
E[Y=1_{X'=1 | X=0, X''=1, D=0}] = 0.650
E[Y=1_{X'=1 | X=0, X''=1, D=1}] = 0.650
E[Y=1_{X'=1 | X=1, X''=0, D=0}] = 0.750
E[Y=1_{X'=1 | X=1, X''=0, D=1}] = 0.751
E[Y=1_{X'=1 | X=1, X''=1, D=0}] = 0.654
E[Y=1_{X'=1 | X=1, X''=1, D=1}] = 0.648
