In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import sys
import os
# Importing
import gc, os, csv
from catenets.models.jax import TNet, SNet1,SNet2,DRNet
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error, roc_auc_score
import pandas as pd
import requests
import zipfile
import os
import sklift
from sklift.metrics import uplift_auc_score

sys.path.append(os.path.abspath('../../data/simulation'))
from utils import tr_te_split,gen_1d_data, backdoor_dgp, frontdoor_dgp, instrument_dgp, simulated_study_2

In [2]:
seed=123
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)

In [3]:
# Two-layer MLP - Experts
class Expert(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Expert, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Two-layer MLP - Gates
class Gate(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_experts):
        super(Gate, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, num_experts)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.softmax(self.fc2(x), dim=-1)
        return x

In [4]:
# MOE
class MixtureOfExperts(nn.Module):
    def __init__(self, input_dim, hidden_dim, expert_output_dim, num_experts, num_tasks):
        super(MixtureOfExperts, self).__init__()
        self.experts = nn.ModuleList([Expert(input_dim, hidden_dim, expert_output_dim) for _ in range(num_experts)])
        self.gates = nn.ModuleDict({
            f"task_{task}_treatment_{treatment}": Gate(input_dim, hidden_dim, num_experts)
            for task in range(num_tasks) for treatment in range(2)
        })
        self.task_heads = nn.ModuleDict({
            f"task_{task}_treatment_{treatment}": nn.Linear(expert_output_dim, 1)
            for task in range(num_tasks) for treatment in range(2)
        })

    def forward(self, x):
        expert_outputs = torch.stack([expert(x) for expert in self.experts], dim=-1)  # Shape: (batch_size, expert_output_dim, num_experts)
        outputs = {}

        for key, gate in self.gates.items():
            gate_weights = gate(x)  # Shape: (batch_size, num_experts)
            gate_weights = gate_weights.unsqueeze(1)  # Shape: (batch_size, 1, num_experts)
            mixture_output = torch.bmm(expert_outputs, gate_weights.transpose(1, 2)).squeeze(2)  # Shape: (batch_size, expert_output_dim)
            task_output = self.task_heads[key](mixture_output)  # Shape: (batch_size, 1)
            outputs[key] = task_output

        return outputs

## Simple MTML

In [5]:
# Load dataset
criteo_data_path = "/data/home/yilingliu/MTML/experiments/CRITEO/criteo-uplift-v2.1.csv"
data = pd.read_csv(criteo_data_path)

# Preprocess data
X = data.drop(columns=["treatment", "visit", "conversion","exposure"]).values
A = data["treatment"].values
Y = data["visit"].values
C = data["conversion"].values


# Convert data to PyTorch tensors
X_tensor = torch.tensor(X, dtype=torch.float32)
A_tensor = torch.tensor(A, dtype=torch.float32)
Y_tensor = torch.tensor(Y, dtype=torch.float32)
C_tensor = torch.tensor(C, dtype=torch.float32)

# Train-Test Split (80-20%)
N = len(X)
split = np.random.choice(np.array([True, False]), N, replace=True, p=np.array([0.8, 0.2]))

X_train, X_test = X[split], X[~split]
A_train, A_test = A[split], A[~split]
Y_train, Y_test = Y[split], Y[~split]
C_train, C_test = C[split], C[~split]

# Convert train data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
A_train_tensor = torch.tensor(A_train, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
C_train_tensor = torch.tensor(C_train, dtype=torch.float32)

In [6]:
# Create DataLoader for batch training
batch_size = 512
dataset = TensorDataset(X_train_tensor, A_train_tensor, Y_train_tensor, C_train_tensor)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Initialize model, optimizer, and loss function
input_dim = X.shape[1]
hidden_dim = 16 #32
expert_output_dim = 16
num_experts = 10
num_tasks = 2

model = MixtureOfExperts(input_dim, hidden_dim, expert_output_dim, num_experts, num_tasks)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.BCEWithLogitsLoss()  # Binary Cross-Entropy Loss for uplift modeling

In [None]:
# Training loop with batches
model.train()
for epoch in range(10):
    total_loss = 0
    for X_batch, A_batch, Y_batch, C_batch in dataloader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = 0
        
        # Compute loss for the entire batch more efficiently
        treatment_mask_0 = A_batch == 0
        treatment_mask_1 = A_batch == 1
        
        y_true_0 = Y_batch[treatment_mask_0]
        y_true_1 = Y_batch[treatment_mask_1]
        c_true_0 = C_batch[treatment_mask_0]
        c_true_1 = C_batch[treatment_mask_1]
        
        if y_true_0.numel() > 0:
            y_pred_0 = outputs["task_0_treatment_0"][treatment_mask_0]
            loss += loss_fn(y_pred_0.squeeze(), y_true_0)
            c_pred_0 = outputs["task_1_treatment_0"][treatment_mask_0]
            loss += loss_fn(c_pred_0.squeeze(), c_true_0)
        
        if y_true_1.numel() > 0:
            y_pred_1 = outputs["task_0_treatment_1"][treatment_mask_1]
            loss += loss_fn(y_pred_1.squeeze(), y_true_1)
            c_pred_1 = outputs["task_1_treatment_1"][treatment_mask_1]
            loss += loss_fn(c_pred_1.squeeze(), c_true_1)
        
        # Backpropagation
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    print(f"Epoch {epoch}, Loss: {total_loss / len(dataloader)}")

In [None]:
# Forward pass with the Mixture of Experts model on test data
model.eval()
with torch.no_grad():
    outputs = model(torch.tensor(X_test, dtype=torch.float32))

# Calculate AUUC for CRITEO-UPLIFT
visit_preds_0 = outputs["task_0_treatment_0"].detach().numpy().flatten()
visit_preds_1 = outputs["task_0_treatment_1"].detach().numpy().flatten()
conversion_preds_0 = outputs["task_1_treatment_0"].detach().numpy().flatten()
conversion_preds_1 = outputs["task_1_treatment_1"].detach().numpy().flatten()

uplift_visit = visit_preds_1 - visit_preds_0
uplift_conversion = conversion_preds_1 - conversion_preds_0

auuc_visit = roc_auc_score(Y_test, uplift_visit)
auuc_conversion = roc_auc_score(C_test, uplift_conversion)

print("AUUC for Visit Uplift:", auuc_visit)
print("AUUC for Conversion Uplift:", auuc_conversion)


In [None]:
# Calculate normalized AUUC
auuc_visit = uplift_auc_score(Y_test, uplift_visit, A_test)
auuc_conversion = uplift_auc_score(C_test, uplift_conversion, A_test)

print("Normalized AUUC for Visit Uplift:", auuc_visit)
print("Normalized AUUC for Conversion Uplift:", auuc_conversion)

In [None]:
# Calculate normalized AUUC
auuc_visit = uplift_auc_score(Y_test, uplift_visit, A_test)
auuc_conversion = uplift_auc_score(C_test, uplift_conversion, A_test)

print("Normalized AUUC for Visit Uplift:", auuc_visit)
print("Normalized AUUC for Conversion Uplift:", auuc_conversion)

In [26]:
import pandas as pd
from causalml.metrics import auuc_score
from sklift.metrics import uplift_auc_score

data = {
    'treatment': [0, 1, 0, 1, 0, 1, 0, 1],
    'outcome': [0, 1, 0, 1, 0, 1, 0, 0],
    'uplift_score': [0.1, 0.9, 0.2, 0.8, 0.3, 0.7, 0.4, 0.5]
}
df = pd.DataFrame(data)

# Calculate AUUC
auuc = auuc_score(df, outcome_col='outcome', treatment_col='treatment', score_col='uplift_score',normalize=True)

print("AUUC from causalML\n",auuc)

AUUC from causalML
 uplift_score    0.444444
Random          0.483333
dtype: float64


In [27]:
auuc_sklift = uplift_auc_score(data['outcome'], data['uplift_score'], data['treatment'])
print("AUUC from sklift\n",auuc_sklift)

AUUC from sklift
 0.21428571428571427
