In [55]:
!pip install torchtext==0.15.2



In [56]:
!pip install datasets



In [57]:
import torch
import torchtext
print(f"PyTorch version: {torch.__version__}")
print(f"torchtext version: {torchtext.__version__}")

PyTorch version: 2.0.1+cu117
torchtext version: 0.15.2+cpu


In [58]:
from datasets import load_dataset
import torch
import torchtext
from torchtext.data import get_tokenizer
from torch.utils.data import DataLoader, random_split

tokenizer = get_tokenizer("basic_english")
vec = torchtext.vocab.GloVe(name='6B', dim=100)

ds = load_dataset("stanfordnlp/imdb")

def process_dataset(split):
    bows = []
    gt = []
    for example in split:
        tokens = tokenizer(example["text"])
        tokens = [token for token in tokens if token in vec.stoi]
        if tokens:
            embeddings = vec.get_vecs_by_tokens(tokens, lower_case_backup=True)
            bow = embeddings.mean(dim=0)
        else:
            bow = torch.zeros(vec.dim)
        bows.append(bow)
        gt.append(example["label"])
    data = torch.stack(bows)
    labels = torch.tensor(gt)
    return torch.utils.data.TensorDataset(data, labels)

train_dataset = process_dataset(ds["train"])
test_dataset = process_dataset(ds["test"])

validation_split = 0.1
train_size = int((1 - validation_split) * len(train_dataset))
val_size = len(train_dataset) - train_size
train_subset, val_subset = random_split(train_dataset, [train_size, val_size])
batch_size = 64
train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Training samples: {len(train_subset)}")
print(f"Validation samples: {len(val_subset)}")
print(f"Test samples: {len(test_dataset)}")


Training samples: 22500
Validation samples: 2500
Test samples: 25000


In [60]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class TextIBPModel(nn.Module):
    def __init__(self, embedding_dim=100, hidden_dim=100, num_classes=2):
        super(TextIBPModel, self).__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.num_classes = num_classes
        self.linear = nn.Linear(embedding_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(hidden_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        """
        x: Tensor of shape (batch_size, embedding_dim)
        """
        x = self.linear(x)
        x = self.relu(x)
        # x is now (batch_size, hidden_dim)
        x = self.relu(self.fc1(x))
        logits = self.fc2(x)
        return logits

In [61]:
class IntervalPropagation:
    def __init__(self, lower, upper):
        self.lower = lower
        self.upper = upper

    def __add__(self, other):
        return IntervalPropagation(self.lower + other, self.upper + other)

    def __mul__(self, other):
        if isinstance(other, IntervalPropagation):
            lower = torch.minimum(torch.minimum(self.lower * other.lower, self.lower * other.upper),
                                  torch.minimum(self.upper * other.lower, self.upper * other.upper))
            upper = torch.maximum(torch.maximum(self.lower * other.lower, self.lower * other.upper),
                                  torch.maximum(self.upper * other.lower, self.upper * other.upper))
            return IntervalPropagation(lower, upper)
        else:
            lower = torch.minimum(self.lower * other, self.upper * other)
            upper = torch.maximum(self.lower * other, self.upper * other)
            return IntervalPropagation(lower, upper)

In [62]:
def propagate_layer(layer, interval):
    """
    layer: nn.Linear layer
    interval: IntervalPropagation object
    """
    lower = F.linear(interval.lower, layer.weight, layer.bias)
    upper = F.linear(interval.upper, layer.weight, layer.bias)
    return IntervalPropagation(lower, upper)

def relu_bounds(lower, upper):
    """Propagate bounds through a ReLU layer."""
    return F.relu(lower), F.relu(upper)

def propagate_model(model, interval):
    """
    model: TextIBPModel
    interval: IntervalPropagation object
    Returns: (lower_logits, upper_logits)
    """
    # Layer 1: Linear -> ReLU
    interval = propagate_layer(model.linear, interval)
    lower, upper = relu_bounds(interval.lower, interval.upper)
    interval = IntervalPropagation(lower, upper)

    # Layer 2: Feedforward Layer 1 -> ReLU
    interval = propagate_layer(model.fc1, interval)
    lower, upper = relu_bounds(interval.lower, interval.upper)
    interval = IntervalPropagation(lower, upper)

    # Layer 3: Feedforward Layer 2 (Output Layer)
    interval = propagate_layer(model.fc2, interval)

    return interval.lower, interval.upper

In [63]:
def ibp_loss(logits, true_labels, logits_robust, kappa):
    nominal_loss = F.cross_entropy(logits, true_labels)
    robust_loss = F.cross_entropy(logits_robust, true_labels)
    return kappa * nominal_loss + (1 - kappa) * robust_loss


In [64]:
def schedule_kappa(step, total_steps):
    initial_kappa = 1.0
    final_kappa = 0.5
    return max(final_kappa, initial_kappa - (initial_kappa - final_kappa) * (step / total_steps))

def schedule_epsilon(step, total_steps, target_epsilon=0.1):
    return min(target_epsilon, target_epsilon * (step / total_steps))

In [65]:
def ibp_forward(model, x, eps):
    """
    model: TextIBPModel
    x: Input tensor of shape (batch_size, embedding_dim)
    eps: Perturbation size
    Returns: (lower_logits, upper_logits)
    """
    lower_bound = x - eps
    upper_bound = x + eps
    interval = IntervalPropagation(lower_bound, upper_bound)
    lower_logits, upper_logits = propagate_model(model, interval)
    return lower_logits, upper_logits

In [66]:
import time

def train_ibp_with_early_stopping(model, optimizer, train_loader, val_loader, total_steps, eps_target=0.1, device='cpu', patience=500):
    model.train()
    current_step = 0
    best_verified_acc = 0
    patience_counter = 0
    start_time = time.time()

    while current_step < total_steps:
        for batch_data, batch_labels in train_loader:
            if current_step >= total_steps:
                break
            batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)
            logits = model(batch_data)
            lower_logits, upper_logits = ibp_forward(model, batch_data, eps_target)
            logits_robust = (lower_logits + upper_logits) / 2
            kappa = schedule_kappa(current_step, total_steps)
            epsilon_train = schedule_epsilon(current_step, total_steps, eps_target)
            loss = ibp_loss(logits, batch_labels, logits_robust, kappa)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if current_step % 100 == 0:
                verified_acc = compute_verified_accuracy(model, val_loader, eps_target, device)
                standard_acc = compute_standard_accuracy(model, val_loader, device)
                print(f"Step [{current_step}/{total_steps}], Loss: {loss.item():.4f}, "
                      f"Kappa: {kappa:.2f}, Epsilon: {epsilon_train:.2f}, "
                      f"Standard Acc: {standard_acc:.2f}%, Verified Acc: {verified_acc:.2f}%")

                if verified_acc > best_verified_acc:
                    best_verified_acc = verified_acc
                    patience_counter = 0
                    torch.save(model.state_dict(), 'best_model.pth')
                else:
                    patience_counter += 1
                    if patience_counter >= patience:
                        print("Early stopping triggered.")
                        return
            current_step += 1

    print("IBP Training completed.")

In [67]:
def compute_verified_accuracy(model, data_loader, eps, device='cpu', margin=1e-6):
    model.eval()
    correct_verified = 0
    total = 0
    with torch.no_grad():
        for batch_data, batch_labels in data_loader:
            batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)
            lower_logits, upper_logits = ibp_forward(model, batch_data, eps)

            if model.fc2.out_features == 2:
                lower_true = torch.where(batch_labels == 1, lower_logits[:,1], lower_logits[:,0])
                upper_other = torch.where(batch_labels == 1, upper_logits[:,0], upper_logits[:,1])
                verified = (lower_true - upper_other) > margin
                correct_verified += verified.sum().item()
                total += batch_labels.size(0)
            else:
                raise NotImplementedError("Verified accuracy computation is only implemented for binary classification.")

    verified_accuracy = (correct_verified / total) * 100
    return verified_accuracy


In [68]:
def compute_standard_accuracy(model, data_loader, device='cpu'):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_data, batch_labels in data_loader:
            batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)
            logits = model(batch_data)
            predictions = torch.argmax(logits, dim=1)
            correct += (predictions == batch_labels).sum().item()
            total += batch_labels.size(0)
    accuracy = (correct / total) * 100
    return accuracy

def evaluate(model, train_loader, val_loader, test_loader, eps, device='cpu'):
    standard_train_acc = compute_standard_accuracy(model, train_loader, device)
    standard_val_acc = compute_standard_accuracy(model, val_loader, device)
    standard_test_acc = compute_standard_accuracy(model, test_loader, device)

    verified_train_acc = compute_verified_accuracy(model, train_loader, eps, device)
    verified_val_acc = compute_verified_accuracy(model, val_loader, eps, device)
    verified_test_acc = compute_verified_accuracy(model, test_loader, eps, device)

    print(f"Standard Training Accuracy: {standard_train_acc:.2f}%")
    print(f"Verified Training Accuracy: {verified_train_acc:.2f}%")
    print(f"Standard Validation Accuracy: {standard_val_acc:.2f}%")
    print(f"Verified Validation Accuracy: {verified_val_acc:.2f}%")
    print(f"Standard Test Accuracy: {standard_test_acc:.2f}%")
    print(f"Verified Test Accuracy: {verified_test_acc:.2f}%")


In [69]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
model = TextIBPModel(embedding_dim=100, hidden_dim=100, num_classes=2).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
total_steps = 10000
eps_target = 0.1
train_ibp_with_early_stopping(model, optimizer, train_loader, val_loader, total_steps, eps_target, device)

print("\nEvaluation Results:")
evaluate(model, train_loader, val_loader, test_loader, eps_target, device)


Using device: cuda
Step [0/10000], Loss: 0.6940, Kappa: 1.00, Epsilon: 0.00, Standard Acc: 49.16%, Verified Acc: 48.60%
Step [100/10000], Loss: 0.6331, Kappa: 0.99, Epsilon: 0.00, Standard Acc: 66.52%, Verified Acc: 62.96%
Step [200/10000], Loss: 0.6590, Kappa: 0.99, Epsilon: 0.00, Standard Acc: 74.92%, Verified Acc: 72.84%
Step [300/10000], Loss: 0.4967, Kappa: 0.98, Epsilon: 0.00, Standard Acc: 75.20%, Verified Acc: 74.08%
Step [400/10000], Loss: 0.5082, Kappa: 0.98, Epsilon: 0.00, Standard Acc: 76.60%, Verified Acc: 75.40%
Step [500/10000], Loss: 0.4553, Kappa: 0.97, Epsilon: 0.01, Standard Acc: 77.60%, Verified Acc: 75.88%
Step [600/10000], Loss: 0.7679, Kappa: 0.97, Epsilon: 0.01, Standard Acc: 76.88%, Verified Acc: 74.64%
Step [700/10000], Loss: 0.4601, Kappa: 0.96, Epsilon: 0.01, Standard Acc: 77.24%, Verified Acc: 76.12%
Step [800/10000], Loss: 0.5525, Kappa: 0.96, Epsilon: 0.01, Standard Acc: 77.36%, Verified Acc: 76.28%
Step [900/10000], Loss: 0.4512, Kappa: 0.95, Epsilon: 0.

In [70]:
def train_standard(model, optimizer, train_loader, val_loader, total_steps, device='cpu'):
    model.train()
    current_step = 0
    start_time = time.time()

    while current_step < total_steps:
        for batch_data, batch_labels in train_loader:
            if current_step >= total_steps:
                break
            batch_data, batch_labels = batch_data.to(device), batch_labels.to(device)

            logits = model(batch_data)
            loss = F.cross_entropy(logits, batch_labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if current_step % 100 == 0:
                elapsed = time.time() - start_time
                print(f"Step [{current_step}/{total_steps}], Loss: {loss.item():.4f}, Time Elapsed: {elapsed:.2f}s")
                start_time = time.time()

            current_step += 1

    print("Standard training completed.")


In [71]:
import copy
# im copying the model so its fresh

standard_model = TextIBPModel(embedding_dim=100, hidden_dim=100, num_classes=2).to(device)
standard_optimizer = optim.Adam(standard_model.parameters(), lr=1e-3)

start_time = time.time()
train_standard(standard_model, standard_optimizer, train_loader, val_loader, total_steps, device)
standard_training_time = time.time() - start_time
print(f"Standard Training Time: {standard_training_time:.2f}s")

model_ibp = TextIBPModel(embedding_dim=100, hidden_dim=100, num_classes=2).to(device)
optimizer_ibp = optim.Adam(model_ibp.parameters(), lr=1e-3)

start_time = time.time()
train_ibp(model_ibp, optimizer_ibp, train_loader, val_loader, total_steps, eps_target, device)
ibp_training_time = time.time() - start_time
print(f"IBP Training Time: {ibp_training_time:.2f}s")

print(f"IBP Training is {ibp_training_time / standard_training_time:.2f} times the standard training time.")


Step [0/10000], Loss: 0.6888, Time Elapsed: 0.00s
Step [100/10000], Loss: 0.5636, Time Elapsed: 0.18s
Step [200/10000], Loss: 0.4850, Time Elapsed: 0.19s
Step [300/10000], Loss: 0.4658, Time Elapsed: 0.19s
Step [400/10000], Loss: 0.5137, Time Elapsed: 0.19s
Step [500/10000], Loss: 0.4374, Time Elapsed: 0.18s
Step [600/10000], Loss: 0.3360, Time Elapsed: 0.18s
Step [700/10000], Loss: 0.5091, Time Elapsed: 0.18s
Step [800/10000], Loss: 0.4713, Time Elapsed: 0.18s
Step [900/10000], Loss: 0.3690, Time Elapsed: 0.18s
Step [1000/10000], Loss: 0.4528, Time Elapsed: 0.19s
Step [1100/10000], Loss: 0.3663, Time Elapsed: 0.20s
Step [1200/10000], Loss: 0.5491, Time Elapsed: 0.19s
Step [1300/10000], Loss: 0.4720, Time Elapsed: 0.19s
Step [1400/10000], Loss: 0.5354, Time Elapsed: 0.19s
Step [1500/10000], Loss: 0.3700, Time Elapsed: 0.19s
Step [1600/10000], Loss: 0.5450, Time Elapsed: 0.19s
Step [1700/10000], Loss: 0.4592, Time Elapsed: 0.20s
Step [1800/10000], Loss: 0.3722, Time Elapsed: 0.19s
Step 