In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import torch
from causalml.metrics import auuc_score
import matplotlib.pyplot as plt
from typing import Dict, Tuple
import torch.nn as nn
import torch.nn.functional as F
import torchtnt.framework as tnt
from torch.utils.data import DataLoader, TensorDataset
from typing import Tuple, Dict
import torch
from torchtnt.framework import AutoUnit, State
from torch import Tensor

seed = 123
torch.manual_seed(seed)
np.random.seed(seed)

In [3]:
# Two-layer MLP - Experts
class Expert(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Expert, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Two-layer MLP - Gates
class Gate(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_experts):
        super(Gate, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, num_experts)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.softmax(self.fc2(x), dim=-1)
        return x

In [4]:
# Mixture of Experts Model
class MixtureOfExperts(nn.Module):
    def __init__(self, input_dim, hidden_dim, expert_output_dim, num_experts, num_tasks):
        super(MixtureOfExperts, self).__init__()
        self.experts = nn.ModuleList([
            Expert(input_dim, hidden_dim, expert_output_dim) for _ in range(num_experts)
        ])
        self.gates = nn.ModuleDict({
            f"task_{task}_treatment_{treatment}": Gate(input_dim, hidden_dim, num_experts)
            for task in range(num_tasks) for treatment in range(2)
        })
        self.task_heads = nn.ModuleDict({
            f"task_{task}_treatment_{treatment}": nn.Linear(expert_output_dim, 1)
            for task in range(num_tasks) for treatment in range(2)
        })

    def forward(self, x):
        expert_outputs = torch.stack(
            [expert(x) for expert in self.experts], dim=-1
        )  # Shape: (batch_size, expert_output_dim, num_experts)
        outputs = {}

        for key, gate in self.gates.items():
            gate_weights = gate(x)  # Shape: (batch_size, num_experts)
            gate_weights = gate_weights.unsqueeze(1)  # Shape: (batch_size, 1, num_experts)
            mixture_output = torch.bmm(
                expert_outputs, gate_weights.transpose(1, 2)
            ).squeeze(2)  # Shape: (batch_size, expert_output_dim)
            task_output = self.task_heads[key](mixture_output)  # Shape: (batch_size, 1)
            outputs[key] = task_output

        return outputs

In [5]:
# Data loading and preprocessing
criteo_data_path = "criteo_uplift_v2.1.csv"
data = pd.read_csv(criteo_data_path)

X = data.drop(columns=["treatment", "visit", "conversion", "exposure"]).values
A = data["treatment"].values
Y = data["visit"].values
C = data["conversion"].values

# Train-test split
N = len(X)
indices = np.random.permutation(N)
split_idx = int(N * 0.8)
train_indices, test_indices = indices[:split_idx], indices[split_idx:]

X_train = X[train_indices]
X_test = X[test_indices]
A_train = A[train_indices]
A_test = A[test_indices]
Y_train = Y[train_indices]
Y_test = Y[test_indices]
C_train = C[train_indices]
C_test = C[test_indices]

# Normalize X to (0, 1) range using training data
scaler = MinMaxScaler()
X_train_normalized = scaler.fit_transform(X_train)  # Fit on training data and transform it
X_test_normalized = scaler.transform(X_test)        # Apply the same transformation to test data

# Convert to tensors and move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

X_train_tensor = torch.tensor(X_train_normalized, dtype=torch.float32, device=device)
X_test_tensor = torch.tensor(X_test_normalized, dtype=torch.float32, device=device)
A_train_tensor = torch.tensor(A_train, dtype=torch.float32, device=device)
A_test_tensor = torch.tensor(A_test, dtype=torch.float32, device=device)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32, device=device)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32, device=device)
C_train_tensor = torch.tensor(C_train, dtype=torch.float32, device=device)
C_test_tensor = torch.tensor(C_test, dtype=torch.float32, device=device)

In [6]:
class UpliftTrainingUnit(AutoUnit):
    def __init__(self, model, loss_fn):
        super().__init__(module=model)
        self.loss_fn = loss_fn
        
    def forward(self, data: Tuple[Tensor, Tensor, Tensor, Tensor]) -> Dict[str, Tensor]:
        X_batch, A_batch, Y_batch, C_batch = data
        outputs = self.module(X_batch)
        return outputs
    
    def compute_loss(self, state: State, data: Tuple[Tensor, Tensor, Tensor, Tensor]) -> Tensor:
        X_batch, A_batch, Y_batch, C_batch = data
        outputs = self.forward(data)
        batch_losses = []
        
        # Process each sample in the batch
        for i in range(len(X_batch)):
            sample_losses = []
            
            # Get individual sample
            treatment = A_batch[i].item()
            
            if treatment == 0:
                # Treatment 0 predictions
                y_pred = outputs["task_0_treatment_0"][i:i+1]
                y_true = Y_batch[i:i+1].unsqueeze(1)
                sample_losses.append(self.loss_fn(y_pred, y_true))
                
                c_pred = outputs["task_1_treatment_0"][i:i+1]
                c_true = C_batch[i:i+1].unsqueeze(1)
                sample_losses.append(self.loss_fn(c_pred, c_true))
                
            else:
                # Treatment 1 predictions
                y_pred = outputs["task_0_treatment_1"][i:i+1]
                y_true = Y_batch[i:i+1].unsqueeze(1)
                sample_losses.append(self.loss_fn(y_pred, y_true))
                
                c_pred = outputs["task_1_treatment_1"][i:i+1]
                c_true = C_batch[i:i+1].unsqueeze(1)
                sample_losses.append(self.loss_fn(c_pred, c_true))
            
            # Combine losses for this sample
            if sample_losses:
                sample_loss = torch.stack(sample_losses).mean()
                batch_losses.append(sample_loss)
        
        # Combine all batch losses
        if batch_losses:
            total_loss = torch.stack(batch_losses).mean()
            state.loss = total_loss  # Store loss in state
            return total_loss
        else:
            # Return zero loss if no valid samples
            return torch.tensor(0.0, device=X_batch.device, requires_grad=True)

    def train_step(self, state: State, data: Tuple[Tensor, Tensor, Tensor, Tensor]) -> State:
        """Training step for the model."""
        # Forward pass and compute loss
        loss = self.compute_loss(state, data)
        # Backward pass
        loss.backward()
        
        return state
        
    def eval_step(self, state: State, data: Tuple[Tensor, Tensor, Tensor, Tensor]) -> State:
        """Evaluation step for the model."""
        with torch.no_grad():
            loss = self.compute_loss(state, data)
            state.loss = loss
        return state

    def configure_optimizers_and_lr_scheduler(self, module):
        optimizer = torch.optim.Adam(module.parameters(), lr=0.001)
        return optimizer, None

# Create datasets and dataloaders
batch_size = 10000
train_dataset = TensorDataset(X_train_tensor, A_train_tensor, Y_train_tensor, C_train_tensor)
test_dataset = TensorDataset(X_test_tensor, A_test_tensor, Y_test_tensor, C_test_tensor)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

# Initialize model and training
input_dim = X.shape[1]
hidden_dim = 16
expert_output_dim = 16
num_experts = 10
num_tasks = 2

model = MixtureOfExperts(input_dim, hidden_dim, expert_output_dim, num_experts, num_tasks).to(device)
loss_fn = nn.BCEWithLogitsLoss()

# Create training unit and run training
unit = UpliftTrainingUnit(model=model, loss_fn=loss_fn)

# Training loop with TorchTNT
tnt.fit(
    unit=unit,
    train_dataloader=train_dataloader,
    eval_dataloader=test_dataloader,
    max_epochs=1
)


KeyboardInterrupt



In [7]:
def evaluate_uplift_model(
    model: torch.nn.Module,
    test_dataloader: torch.utils.data.DataLoader,
    device: torch.device
) -> Tuple[Dict[str, float], Dict[str, pd.DataFrame]]:
    """
    Evaluate uplift model and calculate AUUC scores for visit and conversion.
    
    Args:
        model: Trained uplift model
        test_dataloader: DataLoader containing test data
        device: Device to run the model on
        
    Returns:
        Tuple containing metrics dictionary and evaluation dataframes
    """
    model.eval()
    with torch.no_grad():
        uplift_scores_visit = []
        uplift_scores_conversion = []
        A_test_list = []
        Y_test_list = []
        C_test_list = []
        
        # Collect predictions
        for X_batch, A_batch, Y_batch, C_batch in test_dataloader:
            X_batch = X_batch.to(device)
            outputs = model(X_batch)
            
            # Get predictions for both treatments
            visit_preds_0 = outputs["task_0_treatment_0"].squeeze()
            visit_preds_1 = outputs["task_0_treatment_1"].squeeze()
            conversion_preds_0 = outputs["task_1_treatment_0"].squeeze()
            conversion_preds_1 = outputs["task_1_treatment_1"].squeeze()
            
            # Calculate uplift scores
            uplift_visit = (visit_preds_1 - visit_preds_0).cpu()
            uplift_conversion = (conversion_preds_1 - conversion_preds_0).cpu()
            
            # Collect results
            uplift_scores_visit.append(uplift_visit)
            uplift_scores_conversion.append(uplift_conversion)
            A_test_list.append(A_batch.cpu())
            Y_test_list.append(Y_batch.cpu())
            C_test_list.append(C_batch.cpu())
        
        # Concatenate all collected data
        uplift_scores_visit = torch.cat(uplift_scores_visit).numpy()
        uplift_scores_conversion = torch.cat(uplift_scores_conversion).numpy()
        A_test_all = torch.cat(A_test_list).numpy()
        Y_test_all = torch.cat(Y_test_list).numpy()
        C_test_all = torch.cat(C_test_list).numpy()
        
        # Create evaluation DataFrames
        df_visit = pd.DataFrame({
            'treatment': A_test_all,
            'outcome': Y_test_all,
            'uplift_score': uplift_scores_visit
        })
        
        df_conversion = pd.DataFrame({
            'treatment': A_test_all,
            'outcome': C_test_all,
            'uplift_score': uplift_scores_conversion
        })
        
        # Calculate AUUC scores
        auuc_visit = auuc_score(
            df_visit, 
            outcome_col='outcome',
            treatment_col='treatment',
            score_col='uplift_score',
            normalize=True
        )
        
        auuc_conversion = auuc_score(
            df_conversion,
            outcome_col='outcome',
            treatment_col='treatment',
            score_col='uplift_score',
            normalize=True
        )
        
        # Calculate additional metrics
        treatment_effect_visit = np.mean(Y_test_all[A_test_all == 1]) - np.mean(Y_test_all[A_test_all == 0])
        treatment_effect_conversion = np.mean(C_test_all[A_test_all == 1]) - np.mean(C_test_all[A_test_all == 0])
        
        # Compile metrics
        metrics = {
            'auuc_visit': auuc_visit,
            'auuc_conversion': auuc_conversion,
            'ate_visit': treatment_effect_visit,
            'ate_conversion': treatment_effect_conversion
        }
        
        # Store evaluation dataframes
        eval_dfs = {
            'visit': df_visit,
            'conversion': df_conversion
        }
        
        return metrics, eval_dfs

In [8]:
# Usage example:
metrics, eval_dfs = evaluate_uplift_model(model, test_dataloader, device)

# Print metrics
print("\nUplift Model Evaluation Results:")
print("-" * 30)
print(f"AUUC (Visit): {metrics['auuc_visit'].iloc[0]:.4f}")
print(f"AUUC (Conversion): {metrics['auuc_conversion'].iloc[0]:.4f}")
print(f"Average Treatment Effect (Visit): {metrics['ate_visit']:.4f}")
print(f"Average Treatment Effect (Conversion): {metrics['ate_conversion']:.4f}")



Uplift Model Evaluation Results:
------------------------------
AUUC (Visit): 0.7310
AUUC (Conversion): 0.8428
Average Treatment Effect (Visit): 0.0105
Average Treatment Effect (Conversion): 0.0012
