In [None]:
!pip install torchtext

In [3]:
# !pip install tensorboardX

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import time
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
# from tensorboardX import SummaryWriter

use_cuda = False
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = 64

np.random.seed(42)
torch.manual_seed(42)


## Dataloaders
train_dataset = datasets.MNIST('mnist_data/', train=True, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))
test_dataset = datasets.MNIST('mnist_data/', train=False, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to mnist_data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:02<00:00, 4.80MB/s]


Extracting mnist_data/MNIST/raw/train-images-idx3-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 61.4kB/s]


Extracting mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:06<00:00, 238kB/s]


Extracting mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1007)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 5.50MB/s]

Extracting mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to mnist_data/MNIST/raw






In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [5]:
# Define the network
class SimpleFCNN(nn.Module):
    def __init__(self):
        super(SimpleFCNN, self).__init__()
        self.fc1 = nn.Linear(28*28, 50)
        self.fc2 = nn.Linear(50, 50)
        self.fc3 = nn.Linear(50, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [6]:
def propagate_bounds(x, weight, bias):
    lower, upper = x[0], x[1]
    lower = lower.view(lower.size(0), -1)
    upper = upper.view(upper.size(0), -1)
    center = (lower + upper) / 2
    deviation = (upper - lower) / 2
    center = torch.matmul(center, weight.T) + bias
    deviation = torch.matmul(deviation, torch.abs(weight.T))
    lower_bound = center - deviation
    upper_bound = center + deviation
    return lower_bound, upper_bound

def relu_bounds(lower, upper):
    """Propagate bounds through a ReLU layer."""
    return F.relu(lower), F.relu(upper)

In [7]:
def ibp_loss(logits, true_labels, logits_robust, kappa):
    nominal_loss = F.cross_entropy(logits, true_labels)
    robust_loss = F.cross_entropy(logits_robust, true_labels)
    return kappa * nominal_loss + (1 - kappa) * robust_loss

In [8]:
def ibp_forward(model, x, eps):
    lower_bound = x - eps
    upper_bound = x + eps
    l1, u1 = propagate_bounds((lower_bound, upper_bound), model.fc1.weight, model.fc1.bias)
    l1, u1 = relu_bounds(l1, u1)
    l2, u2 = propagate_bounds((l1, u1), model.fc2.weight, model.fc2.bias)
    l2, u2 = relu_bounds(l2, u2)
    l3, u3 = propagate_bounds((l2, u2), model.fc3.weight, model.fc3.bias)
    return l3, u3

In [9]:
def schedule_kappa(step, total_steps):
    initial_kappa = 1.0
    final_kappa = 0.5
    return max(final_kappa, initial_kappa - (initial_kappa - final_kappa) * (step / total_steps))

def schedule_epsilon(step, total_steps, target_epsilon=0.1):
    return min(target_epsilon, target_epsilon * (step / total_steps))


In [10]:
def train(model, optimizer, train_loader, total_steps, eps_target=0.1):
    model.train()
    current_step = 0

    while current_step < total_steps:
        for images, labels in train_loader:
            if current_step >= total_steps:
                break
            logits = model(images)
            lower_logits, upper_logits = ibp_forward(model, images, eps_target)
            logits_robust = (lower_logits + upper_logits) / 2  # Middle point approximation
            kappa = schedule_kappa(current_step, total_steps)
            epsilon_train = schedule_epsilon(current_step, total_steps, eps_target)
            loss = ibp_loss(logits, labels, logits_robust, kappa)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if current_step % 100 == 0:
                print(f"Step [{current_step}/{total_steps}], Loss: {loss.item()}, kappa: {kappa:.2f}, epsilon_train: {epsilon_train:.2f}")

            current_step += 1

In [11]:
def compute_worst_case_logits(lower_logits, upper_logits, true_labels):
    worst_case_logits = upper_logits.clone()

    for i in range(len(true_labels)):
        worst_case_logits[i, true_labels[i]] = lower_logits[i, true_labels[i]]

    return worst_case_logits

In [12]:
def test(model, test_loader, epsilon):
    model.eval()
    correct_nominal = 0
    correct_robust = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            logits = model(images)
            _, pred = logits.max(1)
            correct_nominal += (pred == labels).sum().item()
            lower_logits, upper_logits = ibp_forward(model, images, epsilon)
            logits_robust = compute_worst_case_logits(lower_logits, upper_logits, labels)
            _, pred_robust = logits_robust.max(1)
            correct_robust += (pred_robust == labels).sum().item()
            total += labels.size(0)
    nominal_accuracy = correct_nominal / total
    robust_accuracy = correct_robust / total
    print(f"Nominal Accuracy: {nominal_accuracy*100:.2f}%, Robust Accuracy: {robust_accuracy*100:.2f}%")

In [13]:
def pgd_attack(model, images, labels, epsilon, alpha=0.01, iters=40):
    adv_images = images.clone().detach().requires_grad_(True)
    for i in range(iters):
        outputs = model(adv_images)
        loss = F.cross_entropy(outputs, labels)
        model.zero_grad()
        loss.backward()
        adv_images = adv_images + alpha * adv_images.grad.sign()
        eta = torch.clamp(adv_images - images, min=-epsilon, max=epsilon)
        adv_images = torch.clamp(images + eta, min=0, max=1).detach_().requires_grad_(True)
    return adv_images

def test_with_pgd(model, test_loader, epsilon, alpha=0.01, iters=40):
    model.eval()
    correct_nominal = 0
    correct_robust = 0
    total = 0
    for images, labels in test_loader:
        with torch.no_grad():
            logits = model(images)
            _, pred = logits.max(1)
            correct_nominal += (pred == labels).sum().item()
        adv_images = pgd_attack(model, images, labels, epsilon, alpha, iters)
        with torch.no_grad():
            adv_logits = model(adv_images)
            _, pred_robust = adv_logits.max(1)
            correct_robust += (pred_robust == labels).sum().item()
        total += labels.size(0)
    nominal_accuracy = correct_nominal / total
    robust_accuracy = correct_robust / total
    print(f"Nominal Accuracy: {nominal_accuracy*100:.2f}%, Robust Accuracy: {robust_accuracy*100:.2f}%")

In [14]:
def hybrid_loss(logits, true_labels, logits_robust, logits_pgd, kappa, lambda_reg=0.5):
    nominal_loss = F.cross_entropy(logits, true_labels)
    robust_loss_ibp = F.cross_entropy(logits_robust, true_labels)
    robust_loss_pgd = F.cross_entropy(logits_pgd, true_labels)
    combined_loss = kappa * nominal_loss + (1 - kappa) * (lambda_reg * robust_loss_ibp + (1 - lambda_reg) * robust_loss_pgd)
    return combined_loss

def train_with_pgd_and_ibp(model, optimizer, train_loader, total_steps, eps_target=0.1, pgd_alpha=0.01, pgd_iters=10):
    model.train()
    current_step = 0

    while current_step < total_steps:
        for images, labels in train_loader:
            if current_step >= total_steps:
                break
            logits = model(images)
            lower_logits, upper_logits = ibp_forward(model, images, eps_target)
            logits_robust = (lower_logits + upper_logits) / 2
            adv_images = pgd_attack(model, images, labels, epsilon=eps_target, alpha=pgd_alpha, iters=pgd_iters)
            logits_pgd = model(adv_images)
            kappa = schedule_kappa(current_step, total_steps)
            epsilon_train = schedule_epsilon(current_step, total_steps, eps_target)
            loss = hybrid_loss(logits, labels, logits_robust, logits_pgd, kappa)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if current_step % 100 == 0:
                print(f"Step [{current_step}/{total_steps}], Loss: {loss.item():.4f}, kappa: {kappa:.2f}, epsilon_train: {epsilon_train:.2f}")

            current_step += 1

In [None]:
model = SimpleFCNN()
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_with_pgd_and_ibp(model, optimizer, train_loader, total_steps=10000)
test(model, test_loader, epsilon=0.01)
test_with_pgd(model, test_loader, epsilon=0.1, alpha=0.01, iters=40)

Step [0/10000], Loss: 2.3178, kappa: 1.00, epsilon_train: 0.00
Step [100/10000], Loss: 0.5459, kappa: 0.99, epsilon_train: 0.00
Step [200/10000], Loss: 0.4860, kappa: 0.99, epsilon_train: 0.00
Step [300/10000], Loss: 0.3766, kappa: 0.98, epsilon_train: 0.00
Step [400/10000], Loss: 0.2463, kappa: 0.98, epsilon_train: 0.00
Step [500/10000], Loss: 0.2111, kappa: 0.97, epsilon_train: 0.01
Step [600/10000], Loss: 0.3070, kappa: 0.97, epsilon_train: 0.01
Step [700/10000], Loss: 0.4215, kappa: 0.96, epsilon_train: 0.01
Step [800/10000], Loss: 0.2074, kappa: 0.96, epsilon_train: 0.01
Step [900/10000], Loss: 0.4599, kappa: 0.95, epsilon_train: 0.01
Step [1000/10000], Loss: 0.3905, kappa: 0.95, epsilon_train: 0.01
Step [1100/10000], Loss: 0.3331, kappa: 0.94, epsilon_train: 0.01
Step [1200/10000], Loss: 0.2277, kappa: 0.94, epsilon_train: 0.01
Step [1300/10000], Loss: 0.2856, kappa: 0.94, epsilon_train: 0.01
Step [1400/10000], Loss: 0.2572, kappa: 0.93, epsilon_train: 0.01
Step [1500/10000], Los

In [15]:
# Interval propagation for IBP
class IntervalPropagation:
    def __init__(self, lower, upper):
        self.lower = lower
        self.upper = upper

    def __add__(self, other):
        return IntervalPropagation(self.lower + other, self.upper + other)

    def __mul__(self, other):
        if isinstance(other, IntervalPropagation):
            lower = torch.minimum(torch.minimum(self.lower * other.lower, self.lower * other.upper),
                                  torch.minimum(self.upper * other.lower, self.upper * other.upper))
            upper = torch.maximum(torch.maximum(self.lower * other.lower, self.lower * other.upper),
                                  torch.maximum(self.upper * other.lower, self.upper * other.upper))
            return IntervalPropagation(lower, upper)
        else:
            lower = torch.minimum(self.lower * other, self.upper * other)
            upper = torch.maximum(self.lower * other, self.upper * other)
            return IntervalPropagation(lower, upper)

In [16]:
def propagate_layer(layer, interval):
    interval_lower = interval.lower.view(interval.lower.size(0), -1)
    interval_upper = interval.upper.view(interval.upper.size(0), -1)
    new_lower = F.linear(interval_lower, layer.weight, layer.bias)
    new_upper = F.linear(interval_upper, layer.weight, layer.bias)
    return IntervalPropagation(new_lower, new_upper)

def propagate_model(model, interval):
    m1 = propagate_layer(model.fc1, interval)
    m1_lower, m1_upper = relu_bounds(m1.lower, m1.upper)
    m2 = propagate_layer(model.fc2, IntervalPropagation(m1_lower, m1_upper))
    m2_lower, m2_upper = relu_bounds(m2.lower, m2.upper)
    m3 = propagate_layer(model.fc3, IntervalPropagation(m2_lower, m2_upper))
    return m3.lower, m3.upper

In [None]:
def schedule_lambda_reg(step, total_steps):
    initial_lambda = 0.5
    final_lambda = 0.8
    return min(final_lambda, initial_lambda + (final_lambda - initial_lambda) * (step / total_steps))

def hybrid_loss(logits, true_labels, logits_robust, logits_pgd, kappa, lambda_reg):
    nominal_loss = F.cross_entropy(logits, true_labels)
    robust_loss_ibp = F.cross_entropy(logits_robust, true_labels)
    robust_loss_pgd = F.cross_entropy(logits_pgd, true_labels)
    return kappa * nominal_loss + (1 - kappa) * (lambda_reg * robust_loss_ibp + (1 - lambda_reg) * robust_loss_pgd)

In [None]:

def pgd_attack(model, images, labels, epsilon, alpha=0.005, iters=50):
    adv_images = images.clone().detach().requires_grad_(True)
    for i in range(iters):
        outputs = model(adv_images)
        loss = F.cross_entropy(outputs, labels)
        model.zero_grad()
        loss.backward()
        adv_images = adv_images + alpha * adv_images.grad.sign()
        eta = torch.clamp(adv_images - images, min=-epsilon, max=epsilon)
        adv_images = torch.clamp(images + eta, min=0, max=1).detach_().requires_grad_(True)
    return adv_images

def train_with_pgd_and_ibp(model, optimizer, train_loader, total_steps, eps_target=0.1, pgd_alpha=0.005, pgd_iters=50):
    model.train()
    current_step = 0

    while current_step < total_steps:
        for images, labels in train_loader:
            if current_step >= total_steps:
                break
            logits = model(images)
            lower_logits, upper_logits = propagate_model(model, IntervalPropagation(images - eps_target, images + eps_target))
            logits_robust = (lower_logits + upper_logits) / 2  # Middle point approximation
            adv_images = pgd_attack(model, images, labels, epsilon=eps_target, alpha=pgd_alpha, iters=pgd_iters)
            logits_pgd = model(adv_images)

            kappa = max(0.5, 1 - current_step / total_steps)
            lambda_reg = schedule_lambda_reg(current_step, total_steps)

            loss = hybrid_loss(logits, labels, logits_robust, logits_pgd, kappa, lambda_reg)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if current_step % 100 == 0:
                print(f"Step [{current_step}/{total_steps}], Loss: {loss.item():.4f}, kappa: {kappa:.2f}, lambda_reg: {lambda_reg:.2f}")

            current_step += 1

def test_verified_accuracy(model, test_loader, eps_values):
    model.eval()
    for eps in eps_values:
        correct_verified = 0
        total = 0
        with torch.no_grad():
            for images, labels in test_loader:
                intervals = IntervalPropagation(images - eps, images + eps)

                lower_logits, upper_logits = propagate_model(model, intervals)

                logits_robust = lower_logits
                _, pred_robust = logits_robust.max(1)
                correct_verified += (pred_robust == labels).sum().item()
                total += labels.size(0)

        verified_accuracy = correct_verified / total * 100
        print(f"Epsilon: {eps:.2f}, Verified Accuracy: {verified_accuracy:.2f}%")


def test_with_pgd(model, test_loader, epsilon, alpha=0.005, iters=50):
    model.eval()
    correct_nominal = 0
    correct_robust = 0
    total = 0
    for images, labels in test_loader:
        with torch.no_grad():
            logits = model(images)
            _, pred = logits.max(1)
            correct_nominal += (pred == labels).sum().item()
        adv_images = pgd_attack(model, images, labels, epsilon, alpha, iters)
        with torch.no_grad():
            adv_logits = model(adv_images)
            _, pred_robust = adv_logits.max(1)
            correct_robust += (pred_robust == labels).sum().item()
        total += labels.size(0)
    nominal_accuracy = correct_nominal / total * 100
    robust_accuracy = correct_robust / total * 100
    print(f"Nominal Accuracy: {nominal_accuracy:.2f}%, Robust Accuracy: {robust_accuracy:.2f}%")


In [19]:
model = SimpleFCNN()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
total_steps = 10000
eps_target = 0.1
pgd_alpha = 0.005
pgd_iters = 50

# Training the model
train_with_pgd_and_ibp(model, optimizer, train_loader, total_steps=total_steps, eps_target=eps_target, pgd_alpha=pgd_alpha, pgd_iters=pgd_iters)

# Testing verified accuracy across different epsilon values
eps_values = np.linspace(0.01, 0.1, 10)
print("\nTest: Model verified accuracy across different epsilons")
test_verified_accuracy(model, test_loader, eps_values)

# Testing robust accuracy with PGD adversarial examples
print("\nTest: Model with PGD adversarial examples")
test_with_pgd(model, test_loader, epsilon=0.1, alpha=0.005, iters=50)

Step [0/10000], Loss: 2.3129, kappa: 1.00, lambda_reg: 0.50
Step [100/10000], Loss: 2.1594, kappa: 0.99, lambda_reg: 0.50
Step [200/10000], Loss: 1.8446, kappa: 0.98, lambda_reg: 0.51
Step [300/10000], Loss: 1.2791, kappa: 0.97, lambda_reg: 0.51
Step [400/10000], Loss: 1.0290, kappa: 0.96, lambda_reg: 0.51
Step [500/10000], Loss: 0.8263, kappa: 0.95, lambda_reg: 0.52
Step [600/10000], Loss: 0.5789, kappa: 0.94, lambda_reg: 0.52
Step [700/10000], Loss: 0.5429, kappa: 0.93, lambda_reg: 0.52
Step [800/10000], Loss: 0.5969, kappa: 0.92, lambda_reg: 0.52
Step [900/10000], Loss: 0.4983, kappa: 0.91, lambda_reg: 0.53
Step [1000/10000], Loss: 0.5427, kappa: 0.90, lambda_reg: 0.53
Step [1100/10000], Loss: 0.4120, kappa: 0.89, lambda_reg: 0.53
Step [1200/10000], Loss: 0.4598, kappa: 0.88, lambda_reg: 0.54
Step [1300/10000], Loss: 0.4892, kappa: 0.87, lambda_reg: 0.54
Step [1400/10000], Loss: 0.6152, kappa: 0.86, lambda_reg: 0.54
Step [1500/10000], Loss: 0.4374, kappa: 0.85, lambda_reg: 0.55
Step