In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from causalml.dataset import synthetic_data
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from causalml.inference.tree.causal.causalforest import CausalRandomForestRegressor

In [3]:
# Load synthetic dataset using updated API
y, X, w, tau, b, e = synthetic_data(mode=1, n=1000, p=5, sigma=1.0, adj=0.0)

# Split into train and test sets
X_train, X_test, y_train, y_test, w_train, w_test, tau_train, tau_test = train_test_split(
    X, y, w, tau, test_size=0.2, random_state=42
)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train, X_test = torch.tensor(X_train, dtype=torch.float32), torch.tensor(X_test, dtype=torch.float32)
y_train, y_test = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1), torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)
w_train, w_test = torch.tensor(w_train, dtype=torch.float32).unsqueeze(1), torch.tensor(w_test, dtype=torch.float32).unsqueeze(1)
tau_train, tau_test = torch.tensor(tau_train, dtype=torch.float32), torch.tensor(tau_test, dtype=torch.float32)

# Print dataset shapes to verify
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}, w_train shape: {w_train.shape}")
print(f"tau_train shape: {tau_train.shape}")

X_train shape: torch.Size([800, 5]), y_train shape: torch.Size([800, 1]), w_train shape: torch.Size([800, 1])
tau_train shape: torch.Size([800])


In [4]:
class DragonNet(nn.Module):
    def __init__(self, input_dim):
        super(DragonNet, self).__init__()
        
        # Shared representation layers
        self.shared = nn.Sequential(
            nn.Linear(input_dim, 200),
            nn.ELU(),
            nn.Linear(200, 100),
            nn.ELU(),
            nn.Linear(100, 100),
            nn.ELU()
        )
        
        # Treatment prediction head (Propensity Score)
        self.propensity_head = nn.Sequential(
            nn.Linear(100, 1),
            nn.Sigmoid()
        )
        
        # Outcome prediction heads
        self.outcome_head_0 = nn.Sequential(nn.Linear(100, 1))  # Y(0)
        self.outcome_head_1 = nn.Sequential(nn.Linear(100, 1))  # Y(1)


        # Learnable epsilon (initialized small)
        self.epsilon = nn.Parameter(torch.tensor(1e-6))

    def forward(self, x):
        representation = self.shared(x)
        
        # Predict treatment probability
        e_x = self.propensity_head(representation)
        
        # Predict potential outcomes
        y0 = self.outcome_head_0(representation)
        y1 = self.outcome_head_1(representation)
        
        return e_x, y0, y1


In [5]:
def make_regression_loss(y_0_pred, y_1_pred, y_true, t_true):
    
    loss0 = (1 - t_true) * torch.square(y_0_pred - y_true)
    loss1 = t_true * torch.square(y_1_pred - y_true)
    loss = loss0 + loss1
    return torch.mean(loss)

def make_binary_classification_loss(t_pred, t_true):
    return nn.BCELoss()(t_pred, t_true)

def make_targeted_regularization_loss(e_x, y0_pred, y1_pred, Y, T, epsilon):
    """ Computes the doubly robust loss """
    
    # Compute predicted outcome based on treatment
    y_pred = T * y1_pred + (1 - T) * y0_pred

    
    # Compute inverse probability weights
    e_x = torch.clamp(e_x, 1e-6, 1 - 1e-6)  # Avoid division by zero
    weight = (T - e_x) / (e_x * (1 - e_x))


    # Compute y_pred_tilde (corrected y_pred with propsensity scores)
    y_pred_tilde = y_pred + epsilon * weight
    
    # Targeted regularization loss
    t_loss = torch.mean((Y-y_pred_tilde) ** 2)
    
    return t_loss


In [6]:
# Initialize model
input_dim = X_train.shape[1]
model = DragonNet(input_dim)

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# hyper-praameter
alpha = 0.1
beta = 0.1

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    e_x, y_0_pred, y_1_pred = model(X_train)
    
    # Compute loss
    regression_loss = make_regression_loss(y_0_pred, y_1_pred, y_train, w_train)
    bce_loss = make_binary_classification_loss(e_x, w_train)
    vanila_loss = regression_loss + alpha * bce_loss
    
    t_loss = make_targeted_regularization_loss(e_x, y_0_pred, y_1_pred, y_train, w_train, model.epsilon)
    loss = vanila_loss + beta * t_loss
    
    
    # Backward pass
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch}: Loss = {loss.item():.4f},  \
        regression loss: {regression_loss.item():.4f}, \
        bce loss: {bce_loss.item():.4f}, \
        t_loss: {t_loss.item():.4f}, \
        epsilon: {model.epsilon.item():.4f}, \
        ")


Epoch 0: Loss = 3.8227,          regression loss: 3.4107,         bce loss: 0.7088,         t_loss: 3.4107,         epsilon: 0.0010,         
Epoch 10: Loss = 1.4440,          regression loss: 1.2529,         bce loss: 0.6559,         t_loss: 1.2550,         epsilon: 0.0098,         
Epoch 20: Loss = 1.3497,          regression loss: 1.1680,         bce loss: 0.6495,         t_loss: 1.1670,         epsilon: 0.0116,         
Epoch 30: Loss = 1.3362,          regression loss: 1.1557,         bce loss: 0.6499,         t_loss: 1.1553,         epsilon: 0.0126,         
Epoch 40: Loss = 1.3214,          regression loss: 1.1435,         bce loss: 0.6337,         t_loss: 1.1453,         epsilon: 0.0123,         
Epoch 50: Loss = 1.3084,          regression loss: 1.1341,         bce loss: 0.6099,         t_loss: 1.1332,         epsilon: 0.0111,         
Epoch 60: Loss = 1.2959,          regression loss: 1.1251,         bce loss: 0.5820,         t_loss: 1.1261,         epsilon: 0.0105,         


In [7]:
sum(p.numel() for p in model.parameters())

31704

In [67]:
model.eval()
with torch.no_grad():
    e_x_test, y0_pred_test, y1_pred_test = model(X_test)

    # Estimate Individual Treatment Effects (ITE)
    tau_hat = (y1_pred_test - y0_pred_test).squeeze().numpy()

    # Compute Mean Absolute Error
    mae = np.mean(np.abs(tau_hat - tau_test.numpy()))
    print(f"Mean Absolute Error in Treatment Effect Estimation: {mae:.4f}")


Mean Absolute Error in Treatment Effect Estimation: 0.2703


In [35]:
# Train Causal Forest using the same synthetic data
causal_forest = CausalRandomForestRegressor(n_estimators=100, min_samples_leaf=5)
causal_forest.fit(X=X_train.numpy(), treatment=w_train.numpy().squeeze(), y=y_train.numpy().squeeze())

CausalRandomForestRegressor(min_samples_leaf=5)

In [37]:
# Estimate treatment effects with Causal Forest
tau_hat_cf = causal_forest.predict(X_test.numpy())

# Compute Mean Absolute Error
mae = np.mean(np.abs(tau_hat_cf - tau_test.numpy()))
print(f"Mean Absolute Error in Treatment Effect Estimation: {mae:.4f}")

Mean Absolute Error in Treatment Effect Estimation: 0.5114


# Training dragon on larger size of dataset leverage torch.dataset

In [73]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from causalml.dataset import synthetic_data
from sklearn.metrics import mean_absolute_error

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load synthetic dataset with larger sample size
y, X, w, tau, b, e = synthetic_data(mode=1, n=50000, p=10, sigma=1.0, adj=0.0)  # Increased dataset size

# Split into train and test sets
X_train, X_test, y_train, y_test, w_train, w_test, tau_train, tau_test = train_test_split(
    X, y, w, tau, test_size=0.2, random_state=42
)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train, X_test = torch.tensor(X_train, dtype=torch.float32), torch.tensor(X_test, dtype=torch.float32)
y_train, y_test = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1), torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)
w_train, w_test = torch.tensor(w_train, dtype=torch.float32).unsqueeze(1), torch.tensor(w_test, dtype=torch.float32).unsqueeze(1)
tau_train, tau_test = torch.tensor(tau_train, dtype=torch.float32), torch.tensor(tau_test, dtype=torch.float32)

# --- PyTorch Dataset & DataLoader ---
class CausalDataset(Dataset):
    def __init__(self, X, y, w):
        self.X = X
        self.y = y
        self.w = w

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx], self.w[idx]

# Create data loaders for mini-batch training
batch_size = 512
train_dataset = CausalDataset(X_train, y_train, w_train)
test_dataset = CausalDataset(X_test, y_test, w_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



# Initialize model
input_dim = X_train.shape[1]
model = DragonNet(input_dim).to(device)

# Optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)

# --- Train DragonNet with Mini-batch Training ---
num_epochs = 10
alpha = 0.1
beta = 0.1
step = 0
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch_X, batch_y, batch_w in train_loader:
        batch_X, batch_y, batch_w = batch_X.to(device), batch_y.to(device), batch_w.to(device)

        optimizer.zero_grad()
        # Forward pass
        e_x, y_0_pred, y_1_pred = model(batch_X)
        
        # Compute loss
        regression_loss = make_regression_loss(y_0_pred, y_1_pred, batch_y, batch_w)
        bce_loss = make_binary_classification_loss(e_x, batch_w)
        vanila_loss = regression_loss + alpha * bce_loss
        
        t_loss = make_targeted_regularization_loss(e_x, y_0_pred, y_1_pred, batch_y, batch_w, model.epsilon)
        loss = vanila_loss + beta * t_loss
    
        loss.backward()
        optimizer.step()
        step += 1

        if step % 50 == 0:
            print(f"Step {step}: Loss = {loss.item():.4f}, regression loss: {regression_loss.item():.4f}, bce loss: {bce_loss.item():.4f}, t_loss: {t_loss.item():.4f}, epsilon: {model.epsilon.item():.4f}")

# --- Evaluate DragonNet ---
model.eval()
tau_hat_dragonnet = []

with torch.no_grad():
    for batch_X, _, _ in test_loader:
        batch_X = batch_X.to(device)
        e_x_test, y0_pred_test, y1_pred_test = model(batch_X)
        tau_hat_dragonnet.extend((y1_pred_test - y0_pred_test).cpu().numpy())

tau_hat_dragonnet = np.array(tau_hat_dragonnet).flatten()

# --- Compare with True Treatment Effects ---
mae_dragonnet = mean_absolute_error(tau_test, tau_hat_dragonnet)
print(f"\nMean Absolute Error for DragonNet: {mae_dragonnet:.4f}")


Using device: cpu
Step 50: Loss = 1.2153, regression loss: 1.0472, bce loss: 0.6365, t_loss: 1.0444, epsilon: 0.0079
Step 100: Loss = 1.1611, regression loss: 1.0060, bce loss: 0.5461, t_loss: 1.0049, epsilon: 0.0047
Step 150: Loss = 1.1649, regression loss: 1.0099, bce loss: 0.5392, t_loss: 1.0106, epsilon: 0.0050
Step 200: Loss = 1.2987, regression loss: 1.1284, bce loss: 0.5745, t_loss: 1.1287, epsilon: 0.0037
Step 250: Loss = 1.1792, regression loss: 1.0241, bce loss: 0.5287, t_loss: 1.0224, epsilon: 0.0092
Step 300: Loss = 1.1184, regression loss: 0.9688, bce loss: 0.5269, t_loss: 0.9687, epsilon: 0.0034
Step 350: Loss = 1.1700, regression loss: 1.0140, bce loss: 0.5455, t_loss: 1.0152, epsilon: 0.0049
Step 400: Loss = 1.1610, regression loss: 1.0075, bce loss: 0.5277, t_loss: 1.0071, epsilon: 0.0037
Step 450: Loss = 1.1284, regression loss: 0.9778, bce loss: 0.5307, t_loss: 0.9757, epsilon: 0.0058
Step 500: Loss = 1.2342, regression loss: 1.0790, bce loss: 0.4718, t_loss: 1.0804,