In [23]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from causalml.dataset import synthetic_data
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from causalml.inference.tree.causal.causalforest import CausalRandomForestRegressor

In [12]:
# Load synthetic dataset using updated API
y, X, w, tau, b, e = synthetic_data(mode=1, n=1000, p=5, sigma=1.0, adj=0.0)

# Split into train and test sets
X_train, X_test, y_train, y_test, w_train, w_test, tau_train, tau_test = train_test_split(
    X, y, w, tau, test_size=0.2, random_state=42
)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train, X_test = torch.tensor(X_train, dtype=torch.float32), torch.tensor(X_test, dtype=torch.float32)
y_train, y_test = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1), torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)
w_train, w_test = torch.tensor(w_train, dtype=torch.float32).unsqueeze(1), torch.tensor(w_test, dtype=torch.float32).unsqueeze(1)
tau_train, tau_test = torch.tensor(tau_train, dtype=torch.float32), torch.tensor(tau_test, dtype=torch.float32)

# Print dataset shapes to verify
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}, w_train shape: {w_train.shape}")
print(f"tau_train shape: {tau_train.shape}")

X_train shape: torch.Size([800, 5]), y_train shape: torch.Size([800, 1]), w_train shape: torch.Size([800, 1])
tau_train shape: torch.Size([800])


In [13]:
class DragonNet(nn.Module):
    def __init__(self, input_dim):
        super(DragonNet, self).__init__()
        
        # Shared representation layers
        self.shared = nn.Sequential(
            nn.Linear(input_dim, 200),
            nn.ReLU(),
            nn.Linear(200, 100),
            nn.ReLU(),
            nn.Linear(100, 100),
            nn.ReLU()
        )
        
        # Treatment prediction head (Propensity Score)
        self.propensity_head = nn.Sequential(
            nn.Linear(100, 1),
            nn.Sigmoid()
        )
        
        # Outcome prediction heads
        self.outcome_head_0 = nn.Sequential(nn.Linear(100, 1))  # Y(0)
        self.outcome_head_1 = nn.Sequential(nn.Linear(100, 1))  # Y(1)

    def forward(self, x):
        representation = self.shared(x)
        
        # Predict treatment probability
        e_x = self.propensity_head(representation)
        
        # Predict potential outcomes
        y0 = self.outcome_head_0(representation)
        y1 = self.outcome_head_1(representation)
        
        return e_x, y0, y1


In [14]:
def targeted_regularization_loss(e_x, y0_pred, y1_pred, Y, T):
    """ Computes the doubly robust loss """
    
    # Compute predicted outcome based on treatment
    y_pred = T * y1_pred + (1 - T) * y0_pred
    
    # Compute residuals
    residual = Y - y_pred
    
    # Compute inverse probability weights
    e_x = torch.clamp(e_x, 1e-6, 1 - 1e-6)  # Avoid division by zero
    weight = (T - e_x) / (e_x * (1 - e_x))
    
    # Targeted regularization loss
    t_loss = torch.mean((residual - weight * residual) ** 2)
    
    return t_loss


In [16]:
# Initialize model
input_dim = X_train.shape[1]
model = DragonNet(input_dim)

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    
    # Forward pass
    e_x, y0_pred, y1_pred = model(X_train)
    
    # Compute loss
    loss = targeted_regularization_loss(e_x, y0_pred, y1_pred, y_train, w_train)
    
    # Backward pass
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch}: Loss = {loss.item():.4f}")


Epoch 0: Loss = 10.9957
Epoch 10: Loss = 6.2082
Epoch 20: Loss = 4.4125
Epoch 30: Loss = 3.9409
Epoch 40: Loss = 3.7250
Epoch 50: Loss = 3.5626
Epoch 60: Loss = 3.4240
Epoch 70: Loss = 3.2883
Epoch 80: Loss = 3.1457
Epoch 90: Loss = 2.9960


In [19]:
model.eval()
with torch.no_grad():
    e_x_test, y0_pred_test, y1_pred_test = model(X_test)

    # Estimate Individual Treatment Effects (ITE)
    tau_hat = (y1_pred_test - y0_pred_test).squeeze().numpy()

    # Compute Mean Absolute Error
    mae = np.mean(np.abs(tau_hat - tau_test.numpy()))
    print(f"Mean Absolute Error in Treatment Effect Estimation: {mae:.4f}")


Mean Absolute Error in Treatment Effect Estimation: 0.3087


In [35]:
# Train Causal Forest using the same synthetic data
causal_forest = CausalRandomForestRegressor(n_estimators=100, min_samples_leaf=5)
causal_forest.fit(X=X_train.numpy(), treatment=w_train.numpy().squeeze(), y=y_train.numpy().squeeze())

CausalRandomForestRegressor(min_samples_leaf=5)

In [37]:
# Estimate treatment effects with Causal Forest
tau_hat_cf = causal_forest.predict(X_test.numpy())

# Compute Mean Absolute Error
mae = np.mean(np.abs(tau_hat_cf - tau_test.numpy()))
print(f"Mean Absolute Error in Treatment Effect Estimation: {mae:.4f}")

Mean Absolute Error in Treatment Effect Estimation: 0.5114


# Training dragon on larger size of dataset leverage torch.dataset

In [38]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from causalml.dataset import synthetic_data
from sklearn.metrics import mean_absolute_error

# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load synthetic dataset with larger sample size
y, X, w, tau, b, e = synthetic_data(mode=1, n=50000, p=10, sigma=1.0, adj=0.0)  # Increased dataset size

# Split into train and test sets
X_train, X_test, y_train, y_test, w_train, w_test, tau_train, tau_test = train_test_split(
    X, y, w, tau, test_size=0.2, random_state=42
)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert to PyTorch tensors
X_train, X_test = torch.tensor(X_train, dtype=torch.float32), torch.tensor(X_test, dtype=torch.float32)
y_train, y_test = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1), torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)
w_train, w_test = torch.tensor(w_train, dtype=torch.float32).unsqueeze(1), torch.tensor(w_test, dtype=torch.float32).unsqueeze(1)
tau_train, tau_test = torch.tensor(tau_train, dtype=torch.float32), torch.tensor(tau_test, dtype=torch.float32)

# --- PyTorch Dataset & DataLoader ---
class CausalDataset(Dataset):
    def __init__(self, X, y, w):
        self.X = X
        self.y = y
        self.w = w

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx], self.w[idx]

# Create data loaders for mini-batch training
batch_size = 512
train_dataset = CausalDataset(X_train, y_train, w_train)
test_dataset = CausalDataset(X_test, y_test, w_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# --- DragonNet Model ---
class DragonNet(nn.Module):
    def __init__(self, input_dim):
        super(DragonNet, self).__init__()
        self.shared = nn.Sequential(
            nn.Linear(input_dim, 200),
            nn.ReLU(),
            nn.Linear(200, 100),
            nn.ReLU(),
            nn.Linear(100, 100),
            nn.ReLU()
        )
        self.propensity_head = nn.Sequential(nn.Linear(100, 1), nn.Sigmoid())
        self.outcome_head_0 = nn.Sequential(nn.Linear(100, 1))
        self.outcome_head_1 = nn.Sequential(nn.Linear(100, 1))

    def forward(self, x):
        representation = self.shared(x)
        e_x = self.propensity_head(representation)
        y0 = self.outcome_head_0(representation)
        y1 = self.outcome_head_1(representation)
        return e_x, y0, y1

def targeted_regularization_loss(e_x, y0_pred, y1_pred, Y, T):
    e_x = torch.clamp(e_x, 1e-6, 1 - 1e-6)  # Prevent division by zero
    weight = (T - e_x) / (e_x * (1 - e_x))
    residual = Y - (T * y1_pred + (1 - T) * y0_pred)
    t_loss = torch.mean((residual - weight * residual) ** 2)
    return t_loss

# Initialize model
input_dim = X_train.shape[1]
model = DragonNet(input_dim).to(device)

# Optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)

# --- Train DragonNet with Mini-batch Training ---
num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    total_loss = 0

    for batch_X, batch_y, batch_w in train_loader:
        batch_X, batch_y, batch_w = batch_X.to(device), batch_y.to(device), batch_w.to(device)

        optimizer.zero_grad()
        e_x, y0_pred, y1_pred = model(batch_X)
        loss = targeted_regularization_loss(e_x, y0_pred, y1_pred, batch_y, batch_w)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    if epoch % 5 == 0:
        print(f"Epoch {epoch}: Avg Loss = {total_loss / len(train_loader):.4f}")

# --- Evaluate DragonNet ---
model.eval()
tau_hat_dragonnet = []

with torch.no_grad():
    for batch_X, _, _ in test_loader:
        batch_X = batch_X.to(device)
        e_x_test, y0_pred_test, y1_pred_test = model(batch_X)
        tau_hat_dragonnet.extend((y1_pred_test - y0_pred_test).cpu().numpy())

tau_hat_dragonnet = np.array(tau_hat_dragonnet).flatten()

# --- Compare with True Treatment Effects ---
mae_dragonnet = mean_absolute_error(tau_test, tau_hat_dragonnet)
print(f"\nMean Absolute Error for DragonNet: {mae_dragonnet:.4f}")


Using device: cpu
Epoch 0: Avg Loss = 5.3966
Epoch 5: Avg Loss = 4.0397
Epoch 10: Avg Loss = 3.9471
Epoch 15: Avg Loss = 3.9092
Epoch 20: Avg Loss = 3.8412
Epoch 25: Avg Loss = 3.7804
Epoch 30: Avg Loss = 3.7204
Epoch 35: Avg Loss = 3.6686
Epoch 40: Avg Loss = 3.5618
Epoch 45: Avg Loss = 3.4963

Mean Absolute Error for DragonNet: 0.2820
