In [1]:
!pip install opacus

Collecting opacus
  Downloading opacus-1.5.4-py3-none-any.whl.metadata (8.7 kB)
Downloading opacus-1.5.4-py3-none-any.whl (254 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m254.4/254.4 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: opacus
Successfully installed opacus-1.5.4


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import torch.nn.utils.prune as prune
import numpy as np
from opacus import PrivacyEngine
import time
import copy

## Model Hyperparameters

In [3]:
def select_dataset(name='MNIST'):
  if name == "MNIST":
    transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),])

    full_train = datasets.MNIST(
        root="./data",
        train=True,
        download=True,
        transform=transform
    )

    train_size = 50000
    val_size = len(full_train) - train_size
    train_dataset, val_dataset = random_split(full_train, [train_size, val_size])

    test_dataset = datasets.MNIST(
        root="./data",
        train=False,
        download=True,
        transform=transform
    )

  elif name == "CIFAR":
    transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),])


    full_train = datasets.CIFAR10(
        root="./data",
        train=True,
        download=True,
        transform=transform
    )

    train_size = 40000
    val_size = len(full_train) - train_size
    train_dataset, val_dataset = random_split(full_train, [train_size, val_size])


    test_dataset = datasets.CIFAR10(
        root="./data",
        train=False,
        download=True,
        transform=transform
    )

  train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
  val_loader   = DataLoader(val_dataset, batch_size=256, shuffle=False)
  test_loader  = DataLoader(test_dataset, batch_size=256, shuffle=False)
  return train_loader, val_loader, test_loader

In [4]:
BATCH_SIZE = 128
LR = 0.01
EPOCHS = 20
SEED = 42
MAX_GRAD_NORM = 1.0
DELTA = 1e-5
PATIENCE = 5

DATASET_NAME = "CIFAR"
train_loader, val_loader, test_loader = select_dataset(DATASET_NAME)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# For dynamically calculated epsilon
NOISE_MULTIPLIER = 1.0

# For a fixed privacy budget (eps, del)
TARGET_EPSILON = 8
TARGET_DELTA = 1e-5
NUM_EPOCHS = 20

PRUNE_EPOCHS = 2

torch.manual_seed(SEED)
np.random.seed(SEED)

100%|██████████| 170M/170M [00:47<00:00, 3.62MB/s]


In [5]:
# baseline model
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        #=======================================#
        #            MNIST Settings
        #=======================================#

        # self.conv1 = nn.Conv2d(1, 16, 3, 1)
        # self.conv2 = nn.Conv2d(16, 32, 3, 1)
        # self.fc1 = nn.Linear(32*12*12, 64)
        # self.fc2 = nn.Linear(64, 10)

        #=======================================#
        #            CIFAR-10 Settings
        #=======================================#

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(in_features=576, out_features=64)
        self.fc2 = nn.Linear(in_features=64, out_features=10)

    def forward(self, x):
        # x = F.relu(self.conv1(x))
        # x = F.relu(self.conv2(x))
        # x = F.max_pool2d(x, 2)
        # x = torch.flatten(x, 1)
        # x = F.relu(self.fc1(x))
        # return self.fc2(x)

        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = torch.flatten(x, 1)

        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [6]:
# MNIST Model
class CNN_MNIST(nn.Module):
    def __init__(self):
        super().__init__()

        #=======================================#
        #            MNIST Settings
        #=======================================#

        self.conv1 = nn.Conv2d(1, 16, 3, 1)
        self.conv2 = nn.Conv2d(16, 32, 3, 1)
        self.fc1 = nn.Linear(32*12*12, 64)
        self.fc2 = nn.Linear(64, 10)

    def forward(self, x):

        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)


# CIFAR-10 Model

class CNN_CIFAR10(nn.Module):
    def __init__(self):
        super().__init__()

        #=======================================#
        #            CIFAR-10 Settings
        #=======================================#

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(in_features=576, out_features=64)
        self.fc2 = nn.Linear(in_features=64, out_features=10)

    def forward(self, x):

        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = torch.flatten(x, 1)

        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [7]:
def train_one_epoch(model, loader, optimizer):
    # train loop
    model.train()
    total_loss = 0
    for batch_idx, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.cross_entropy(out, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

In [8]:
@torch.no_grad()
def evaluate(model, loader):
    # evaluation
    model.eval()
    loss, correct = 0, 0
    for data, target in loader:
        data, target = data.to(device), target.to(device)
        out = model(data)
        loss += F.cross_entropy(out, target, reduction="sum").item()
        pred = out.argmax(1)
        correct += pred.eq(target).sum().item()
    loss /= len(loader.dataset)
    acc = 100. * correct / len(loader.dataset)
    return loss, acc

In [9]:
def weight_pruning(model, amount, return_mask=False, remove=True):
    parameters_to_prune = [
        (m, "weight") for m in model.modules()
        if isinstance(m, (nn.Conv2d, nn.Linear))
    ]

    prune.global_unstructured(
        parameters_to_prune,
        pruning_method=prune.L1Unstructured,
        amount=amount,
    )

    mask_dict = None
    if return_mask:
        mask_dict = {
            f"{name}.weight": module.weight_mask.detach().clone()
            for name, module in model.named_modules()
            if hasattr(module, "weight_mask")
        }

    if remove:
      for module, _ in parameters_to_prune:
          prune.remove(module, "weight")

    total = sum(p.numel() for p in model.parameters())
    zeros = sum((p == 0).sum().item() for p in model.parameters())
    sparsity = 100 * zeros / total

    print(f"Pruned model sparsity = {sparsity:.2f}%")

    return model, mask_dict

In [10]:
def compute_imp_prune_amount(prune_amount, num_iterations):
    remaining = 1 - prune_amount
    p = 1 - remaining ** (1 / num_iterations)
    return p

In [11]:
def calculate_sparsity(model):
    total = zeros = 0
    for name, p in model.named_parameters():
        if "weight" in name:
            arr = p.detach().cpu().numpy()
            total += arr.size
            zeros += (arr == 0).sum()
    return 100 * zeros / total

In [12]:
def run_experiment(dataset='MNIST', use_dp=False, pruning_type=None, final_sparsity=None, fixed_privacy_budget=True, prune_epochs=None):

    # setup model and optimizer
    if dataset == 'MNIST':
      model = CNN_MNIST().to(device)
    elif dataset == "CIFAR":
      model = CNN_CIFAR10().to(device)

    # Defaults to base CNN class if no dataset is specified
    else:
      model = CNN().to(device)

    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9)

    privacy_engine = None
    if use_dp:

        if fixed_privacy_budget:

            privacy_engine = PrivacyEngine()

            # Calculates sigma based on target epsilon and delta
            model, optimizer, train_loader_dp = privacy_engine.make_private_with_epsilon(
              module=model,
              optimizer=optimizer,
              data_loader=train_loader,
              max_grad_norm=MAX_GRAD_NORM,
              target_delta=TARGET_DELTA,
              target_epsilon=TARGET_EPSILON,
              epochs=NUM_EPOCHS
        )
        else:

            privacy_engine = PrivacyEngine()

            # Otherwise takes sigma as a hyperparameter
            model, optimizer, train_loader_dp = privacy_engine.make_private(
              module=model,
              optimizer=optimizer,
              data_loader=train_loader,
              max_grad_norm=MAX_GRAD_NORM,
              noise_multiplier=NOISE_MULTIPLIER,
        )
    else:
        train_loader_dp = train_loader


    # train
    sparsity = 0
    prune_time = 0
    best_val_acc = 0
    epochs_no_improve = 0
    best_model_path = f"best_model{'_dp' if use_dp else ''}.pt"
    rewind_state = None
    global_mask = None
    if pruning_type == "LTH":
      prune_steps = max(1, EPOCHS - 2)
      train_epochs = EPOCHS
      lth_step = 0
    elif pruning_type == "POST":
      prune_amount = final_sparsity
      train_epochs = EPOCHS - PRUNE_EPOCHS
    else:
      prune_amount = final_sparsity
      train_epochs = EPOCHS

    if pruning_type == "PRE":
      prune_start_time = time.time()
      model, _ = weight_pruning(model, prune_amount)
      prune_end_time = time.time()
      prune_time = prune_end_time - prune_start_time
      final_sparsity = calculate_sparsity(model)

    start_time = time.time()

    for epoch in range(1, train_epochs + 1):
        train_loss = train_one_epoch(model, train_loader_dp, optimizer)
        val_loss, val_acc = evaluate(model, val_loader)

        eps = privacy_engine.get_epsilon(DELTA)  if use_dp else None

        print(f"[{'DP-SGD' if use_dp else 'Standard SGD'}] Epoch {epoch}: "
            + f"train_loss={train_loss:.4f}, "
            + f"val_loss={val_loss:.4f}, val_acc={val_acc:.4f}%"
            + f", ε={eps:.4f}" if use_dp else "")

        if pruning_type == "LTH" and epoch == 1:
            rewind_state = copy.deepcopy(model.state_dict())
            print("Saved early-rewind weights for LTH")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            epochs_no_improve = 0
            torch.save(model.state_dict(), best_model_path)
        else:
            epochs_no_improve += 1

        if epochs_no_improve >= PATIENCE:
            print(f"\nEarly stopping at epoch {epoch} (no improvement for {PATIENCE} epochs).")
            break

        if pruning_type == "LTH" and epoch != 1 and epoch in prune_epochs:
          lth_step += 1
          remaining_k = (1 - final_sparsity) ** (lth_step / len(prune_epochs))
          target_sparsity = 1 - remaining_k
          print(f"Pruning {target_sparsity*100:.2f}% at epoch {epoch}")
          model, new_mask = weight_pruning(model, amount=target_sparsity, return_mask=True)
          global_mask = new_mask

          rewound = copy.deepcopy(rewind_state)
          for name, mask in global_mask.items():
              if name in rewound:
                  rewound[name] = rewound[name] * mask   # apply lottery ticket mask

          model.load_state_dict(rewound)
          epochs_no_improve = 0

    end_time = time.time()
    total_time = end_time - start_time

    if pruning_type == "POST":
      prune_start_time = time.time()
      model, _ = weight_pruning(model, amount=prune_amount)
      final_sparsity = calculate_sparsity(model)

      for ft_epoch in range(1, PRUNE_EPOCHS + 1):
            train_loss = train_one_epoch(model, train_loader_dp, optimizer)
            val_loss, val_acc = evaluate(model, val_loader)
            eps = privacy_engine.get_epsilon(DELTA) if use_dp else None

            print(f"[Fine Tuning Epoch {ft_epoch}] val_acc={val_acc:.4f}%"
                  + (f", ε={eps:.4f}" if use_dp else ""))
            if val_acc > best_val_acc:
                best_val_acc = val_acc
                torch.save(model.state_dict(), best_model_path)

      prune_end_time = time.time()
      prune_time = prune_end_time - prune_start_time
      end_time = time.time()
      total_time = end_time - start_time

    # test on best model
    if pruning_type == "LTH":
      # reconstruct final winning ticket
      if global_mask is None:
          ticket_state = copy.deepcopy(rewind_state)
      else:
          ticket_state = copy.deepcopy(rewind_state)
          for name, mask in global_mask.items():
              if name in ticket_state:
                  ticket_state[name] = ticket_state[name] * mask
      model.load_state_dict(ticket_state)
    elif pruning_type == "POST":
      # keep already pruned & fine-tuned model
      pass
    else:
      model.load_state_dict(torch.load(best_model_path))
    test_loss, test_acc = evaluate(model, test_loader)
    final_eps = privacy_engine.get_epsilon(DELTA) if use_dp else None
    final_sparsity = calculate_sparsity(model)

    print(f"\n=== Results for {'DP-SGD' if use_dp else 'Standard-SGD'} with {pruning_type if pruning_type else 'No'} Pruning ===")
    print(f"Test Accuracy: {test_acc:.2f}%")
    print(f"Best Val Acc: {best_val_acc:.2f}%")
    if use_dp:
        print(f"Final ε = {final_eps:.3f}")
    print(f"Train Time:     {total_time:.2f} sec")
    if pruning_type:
        print(f"Sparsity:       {final_sparsity:.2f}%")

    return {
        "val_acc": best_val_acc,
        "test_acc": test_acc,
        "epsilon": final_eps,
        "train_time": total_time,
        "sparsity": final_sparsity
    }

In [13]:
def magnitude_prune_mask(model, amount):
    all_weights = torch.cat([
        p.detach().abs().flatten()
        for n, p in model.named_parameters()
        if "weight" in n
    ])

    k = int((1 - amount) * all_weights.numel())
    threshold = all_weights.kthvalue(k).values.item()

    mask = {}
    for name, param in model.named_parameters():
        if "weight" in name:
            mask[name] = (param.detach().abs() > threshold).float()
    return mask

In [14]:
def weight_pruning(model, amount, return_mask=False, remove=True):
    parameters_to_prune = [
        (m, "weight") for m in model.modules()
        if isinstance(m, (nn.Conv2d, nn.Linear))
    ]

    prune.global_unstructured(
        parameters_to_prune,
        pruning_method=prune.L1Unstructured,
        amount=amount,
    )

    mask_dict = None
    if return_mask:
        mask_dict = {
            f"{name}.weight": module.weight_mask.detach().clone()
            for name, module in model.named_modules()
            if hasattr(module, "weight_mask")
        }

    if remove:
      for module, _ in parameters_to_prune:
          prune.remove(module, "weight")

    total = sum(p.numel() for p in model.parameters())
    zeros = sum((p == 0).sum().item() for p in model.parameters())
    sparsity = 100 * zeros / total

    print(f"Pruned model sparsity = {sparsity:.2f}%")

    return model, mask_dict

In [15]:
def magnitude_pruning(model, amount):
    all_weights = torch.cat([
        p.detach().abs().flatten()
        for n, p in model.named_parameters()
        if "weight" in n
    ])

    k = int(amount * all_weights.numel())
    if k == 0:
        return {}

    threshold = torch.topk(all_weights, k, largest=False).values.max()
    mask = {}
    for name, p in model.named_parameters():
        if "weight" in name:
            mask[name] = (p.data.abs() > threshold).float()
    return mask

In [16]:
def apply_mask(model, mask):
    with torch.no_grad():
        for name, param in model.named_parameters():
            if name in mask:
                param.mul_(mask[name])

In [17]:
def train_one_epoch(model, loader, optimizer, mask=None):
    model.train()
    total_loss = 0
    for batch_idx, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.cross_entropy(out, target)
        loss.backward()
        optimizer.step()
        if mask is not None:
            apply_mask(model, mask)
        total_loss += loss.item()
    return total_loss / len(loader)

In [18]:
def run_lth_dp(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True,
                          prune_rounds=2,
                          final_sparsity=0.3,
                          rewind_epochs=1):
  # setup model and optimizer
    if dataset == 'MNIST':
      model = CNN_MNIST().to(device)
    elif dataset == "CIFAR":
      model = CNN_CIFAR10().to(device)

    # Defaults to base CNN class if no dataset is specified
    else:
      model = CNN().to(device)

    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9)

    privacy_engine = None
    if use_dp:

        if fixed_privacy_budget:

            privacy_engine = PrivacyEngine()

            # Calculates sigma based on target epsilon and delta
            model, optimizer, train_loader_dp = privacy_engine.make_private_with_epsilon(
              module=model,
              optimizer=optimizer,
              data_loader=train_loader,
              max_grad_norm=MAX_GRAD_NORM,
              target_delta=TARGET_DELTA,
              target_epsilon=TARGET_EPSILON,
              epochs=NUM_EPOCHS*2
        )
        else:

            privacy_engine = PrivacyEngine()

            # Otherwise takes sigma as a hyperparameter
            model, optimizer, train_loader_dp = privacy_engine.make_private(
              module=model,
              optimizer=optimizer,
              data_loader=train_loader,
              max_grad_norm=MAX_GRAD_NORM,
              noise_multiplier=NOISE_MULTIPLIER,
        )
    else:
        train_loader_dp = train_loader

    start_time = time.time()

    # rewind state
    for e in range(1, rewind_epochs + 1):
        train_one_epoch(model, train_loader_dp, optimizer, mask=None)

    rewind_state = copy.deepcopy(model.state_dict())

    current_mask = None
    for r in range(1, prune_rounds+1):
        round_start_time = time.time()
        for epoch in range(rewind_epochs + 1, EPOCHS + 1):
            train_loss = train_one_epoch(model, train_loader, optimizer, mask=current_mask)
            val_loss, val_acc = evaluate(model, val_loader)

            eps = privacy_engine.get_epsilon(DELTA)  if use_dp else None

            print(f"[{'DP-SGD' if use_dp else 'Standard SGD'}] Epoch {epoch}, LTH Iteration {r}: "
            + f"train_loss={train_loss:.4f}, "
            + f"val_loss={val_loss:.4f}, val_acc={val_acc:.4f}%"
            + f", ε={eps:.4f}" if use_dp else "")

        target_sparsity = 1 - (1 - final_sparsity)**(r/prune_rounds)
        print(f"Pruning {target_sparsity*100:.2f}% at epoch {epoch}")
        new_mask = magnitude_pruning(model, amount=target_sparsity)

        if current_mask is None:
            current_mask = new_mask
        else:
            for name in current_mask:
                current_mask[name] = current_mask[name] * new_mask[name]

        model.load_state_dict(rewind_state)
        apply_mask(model, current_mask)
        round_time = time.time() - round_start_time
        print(f"LTH Iteration {r} Time:     {round_time:.2f} sec")

        # lth_step += 1
        # remaining_k = (1 - prune_amount) ** (lth_step / len(prune_epochs))
        # target_sparsity = 1 - remaining_k
        # new_mask = magnitude_prune_mask(model, target_sparsity)

        # # combine with previous masks
        # if current_mask is None:
        #     current_mask = new_mask
        # else:
        #     for name in current_mask:
        #         current_mask[name] = current_mask[name] * new_mask[name]

        # # rewind
        # model.load_state_dict(rewind_state)

        # apply_mask(model, current_mask)

    end_time = time.time()
    total_time = end_time - start_time

    test_loss, test_acc = evaluate(model, test_loader)
    final_eps = privacy_engine.get_epsilon(DELTA) if use_dp else None
    final_sparsity = calculate_sparsity(model)

    print(f"\n=== Results for {'DP-SGD' if use_dp else 'Standard-SGD'} with LTH Pruning ===")
    print(f"Test Accuracy: {test_acc:.2f}%")
    print(f"Final ε = {final_eps:.3f}")
    print(f"Train Time:     {total_time:.2f} sec")
    print(f"Sparsity:       {final_sparsity:.2f}%")

    return {
        "test_acc": test_acc,
        "epsilon": final_eps,
        "train_time": total_time,
        "sparsity": final_sparsity
    }

In [19]:
run_lth_dp(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True,
                          prune_rounds=2,
                          final_sparsity=0.3,
                          rewind_epochs=1)

  loss.backward()


[DP-SGD] Epoch 2, LTH Iteration 1: train_loss=1.9922, val_loss=1.9628, val_acc=30.9300%, ε=3.0262
[DP-SGD] Epoch 3, LTH Iteration 1: train_loss=1.9436, val_loss=1.9402, val_acc=32.1500%, ε=3.3019
[DP-SGD] Epoch 4, LTH Iteration 1: train_loss=1.9224, val_loss=1.9196, val_acc=33.4400%, ε=3.5338
[DP-SGD] Epoch 5, LTH Iteration 1: train_loss=1.8961, val_loss=1.8944, val_acc=34.6400%, ε=3.7406
[DP-SGD] Epoch 6, LTH Iteration 1: train_loss=1.8696, val_loss=1.8680, val_acc=35.5400%, ε=3.9306
[DP-SGD] Epoch 7, LTH Iteration 1: train_loss=1.8442, val_loss=1.8436, val_acc=36.7200%, ε=4.1084
[DP-SGD] Epoch 8, LTH Iteration 1: train_loss=1.8152, val_loss=1.8113, val_acc=37.9900%, ε=4.2767
[DP-SGD] Epoch 9, LTH Iteration 1: train_loss=1.7946, val_loss=1.8062, val_acc=38.0900%, ε=4.4373
[DP-SGD] Epoch 10, LTH Iteration 1: train_loss=1.7876, val_loss=1.7927, val_acc=38.8900%, ε=4.5915
[DP-SGD] Epoch 11, LTH Iteration 1: train_loss=1.7759, val_loss=1.7951, val_acc=39.4100%, ε=4.7402
[DP-SGD] Epoch 12,

{'test_acc': 28.9,
 'epsilon': np.float64(7.9038610561231915),
 'train_time': 1270.6962699890137,
 'sparsity': np.float64(29.99845647252521)}

# Baseline: No Differential Privacy

In [None]:
print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)


Running DP-SGD baseline for fixed (8, 1e-05)-DP


  loss.backward()


[DP-SGD] Epoch 1: train_loss=2.1917, val_loss=2.0178, val_acc=27.9000%, ε=3.5278
[DP-SGD] Epoch 2: train_loss=1.9918, val_loss=1.9499, val_acc=31.1700%, ε=4.0227
[DP-SGD] Epoch 3: train_loss=1.9518, val_loss=1.9226, val_acc=32.5700%, ε=4.3957
[DP-SGD] Epoch 4: train_loss=1.9134, val_loss=1.9086, val_acc=33.7700%, ε=4.7147
[DP-SGD] Epoch 5: train_loss=1.8940, val_loss=1.8763, val_acc=35.5500%, ε=5.0010
[DP-SGD] Epoch 6: train_loss=1.8705, val_loss=1.8478, val_acc=36.2500%, ε=5.2647
[DP-SGD] Epoch 7: train_loss=1.8364, val_loss=1.8299, val_acc=37.0400%, ε=5.5113
[DP-SGD] Epoch 8: train_loss=1.8077, val_loss=1.8021, val_acc=37.7500%, ε=5.7445
[DP-SGD] Epoch 9: train_loss=1.7870, val_loss=1.7713, val_acc=39.2000%, ε=5.9666
[DP-SGD] Epoch 10: train_loss=1.7724, val_loss=1.7782, val_acc=40.0200%, ε=6.1795
[DP-SGD] Epoch 11: train_loss=1.7721, val_loss=1.7699, val_acc=40.4600%, ε=6.3845
[DP-SGD] Epoch 12: train_loss=1.7580, val_loss=1.7739, val_acc=41.4000%, ε=6.5826
[DP-SGD] Epoch 13: train_

{'val_acc': 44.16,
 'test_acc': 44.51,
 'epsilon': np.float64(7.993863084259577),
 'train_time': 695.9134566783905,
 'sparsity': np.float64(0.0)}

In [None]:
print("\nRunning DP-SGD baseline with pre weight pruning")
run_experiment(dataset=DATASET_NAME, use_dp=True, pruning_type="PRE", final_sparsity=0.3, fixed_privacy_budget=True)


Running DP-SGD baseline with pre weight pruning




Pruned model sparsity = 29.93%


  loss.backward()


[DP-SGD] Epoch 1: train_loss=2.1795, val_loss=2.0286, val_acc=27.7600%, ε=3.5278
[DP-SGD] Epoch 2: train_loss=1.9854, val_loss=1.9519, val_acc=30.7200%, ε=4.0227
[DP-SGD] Epoch 3: train_loss=1.9393, val_loss=1.9201, val_acc=32.3500%, ε=4.3957
[DP-SGD] Epoch 4: train_loss=1.9164, val_loss=1.9014, val_acc=33.6200%, ε=4.7147
[DP-SGD] Epoch 5: train_loss=1.8889, val_loss=1.8650, val_acc=34.7300%, ε=5.0010
[DP-SGD] Epoch 6: train_loss=1.8601, val_loss=1.8599, val_acc=35.5500%, ε=5.2647
[DP-SGD] Epoch 7: train_loss=1.8517, val_loss=1.8193, val_acc=36.8200%, ε=5.5113
[DP-SGD] Epoch 8: train_loss=1.8174, val_loss=1.8005, val_acc=37.7900%, ε=5.7445
[DP-SGD] Epoch 9: train_loss=1.7880, val_loss=1.7819, val_acc=38.4100%, ε=5.9666
[DP-SGD] Epoch 10: train_loss=1.7869, val_loss=1.7846, val_acc=39.2800%, ε=6.1795
[DP-SGD] Epoch 11: train_loss=1.7732, val_loss=1.7805, val_acc=39.8200%, ε=6.3845
[DP-SGD] Epoch 12: train_loss=1.7628, val_loss=1.7696, val_acc=40.3800%, ε=6.5826
[DP-SGD] Epoch 13: train_

{'val_acc': 43.23,
 'test_acc': 43.67,
 'epsilon': np.float64(7.993863084259577),
 'train_time': 683.007700920105,
 'sparsity': np.float64(0.0)}

In [None]:
print("\nRunning DP-SGD baseline with post weight pruning")
run_experiment(dataset=DATASET_NAME, use_dp=True, pruning_type="POST", final_sparsity=0.3, fixed_privacy_budget=True)


Running DP-SGD baseline with post weight pruning


  loss.backward()


[DP-SGD] Epoch 1: train_loss=2.2346, val_loss=2.1269, val_acc=22.8900%, ε=3.5278
[DP-SGD] Epoch 2: train_loss=2.0741, val_loss=2.0417, val_acc=25.7500%, ε=4.0227
[DP-SGD] Epoch 3: train_loss=1.9967, val_loss=1.9780, val_acc=28.5200%, ε=4.3957
[DP-SGD] Epoch 4: train_loss=1.9407, val_loss=1.9127, val_acc=30.6300%, ε=4.7147
[DP-SGD] Epoch 5: train_loss=1.8807, val_loss=1.8505, val_acc=33.6300%, ε=5.0010
[DP-SGD] Epoch 6: train_loss=1.8369, val_loss=1.8148, val_acc=35.7500%, ε=5.2647
[DP-SGD] Epoch 7: train_loss=1.7946, val_loss=1.7972, val_acc=37.1700%, ε=5.5113
[DP-SGD] Epoch 8: train_loss=1.7850, val_loss=1.7710, val_acc=37.9400%, ε=5.7445
[DP-SGD] Epoch 9: train_loss=1.7740, val_loss=1.7882, val_acc=38.6500%, ε=5.9666
[DP-SGD] Epoch 10: train_loss=1.7792, val_loss=1.7800, val_acc=39.4200%, ε=6.1795
[DP-SGD] Epoch 11: train_loss=1.7768, val_loss=1.7796, val_acc=40.0300%, ε=6.3845
[DP-SGD] Epoch 12: train_loss=1.7847, val_loss=1.8050, val_acc=40.7100%, ε=6.5826
[DP-SGD] Epoch 13: train_

{'val_acc': 43.84,
 'test_acc': 44.67,
 'epsilon': np.float64(7.993863084259577),
 'train_time': 702.9420344829559,
 'sparsity': np.float64(0.0)}

In [None]:
print("\nRunning DP-SGD baseline with iterative pruning")
run_experiment(dataset=DATASET_NAME, use_dp=True, pruning_type="LTH", final_sparsity=0.3, fixed_privacy_budget=True, prune_epochs=[5, 10, 15])


Running DP-SGD baseline with iterative pruning


  loss.backward()


[DP-SGD] Epoch 1: train_loss=2.1879, val_loss=2.0261, val_acc=26.8400%, ε=3.5278
Saved early-rewind weights for LTH
[DP-SGD] Epoch 2: train_loss=1.9955, val_loss=1.9609, val_acc=31.3300%, ε=4.0227
[DP-SGD] Epoch 3: train_loss=1.9416, val_loss=1.9357, val_acc=32.5400%, ε=4.3957
[DP-SGD] Epoch 4: train_loss=1.9191, val_loss=1.9143, val_acc=34.3100%, ε=4.7147
[DP-SGD] Epoch 5: train_loss=1.9001, val_loss=1.8848, val_acc=35.6100%, ε=5.0010
Pruning 11.21% at epoch 5
Pruned model sparsity = 11.18%
[DP-SGD] Epoch 6: train_loss=1.9807, val_loss=1.9644, val_acc=30.9500%, ε=5.2647
[DP-SGD] Epoch 7: train_loss=1.9374, val_loss=1.9255, val_acc=32.5900%, ε=5.5113
[DP-SGD] Epoch 8: train_loss=1.9121, val_loss=1.9091, val_acc=34.3200%, ε=5.7445
[DP-SGD] Epoch 9: train_loss=1.9035, val_loss=1.8781, val_acc=35.5100%, ε=5.9666
[DP-SGD] Epoch 10: train_loss=1.8618, val_loss=1.8699, val_acc=36.4800%, ε=6.1795
Pruning 21.16% at epoch 10
Pruned model sparsity = 21.11%
[DP-SGD] Epoch 11: train_loss=1.9790, v

{'val_acc': 37.12,
 'test_acc': 28.2,
 'epsilon': np.float64(7.993863084259577),
 'train_time': 737.007874250412,
 'sparsity': np.float64(30.001029018316526)}

MNIST

In [None]:
print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)


Running DP-SGD baseline for fixed (8, 1e-05)-DP


  loss.backward()


[DP-SGD] Epoch 1: train_loss=0.9209, val_loss=0.5501, val_acc=86.9200%, ε=3.6072
[DP-SGD] Epoch 2: train_loss=0.5346, val_loss=0.5889, val_acc=88.6300%, ε=4.1009
[DP-SGD] Epoch 3: train_loss=0.5745, val_loss=0.6062, val_acc=89.2900%, ε=4.4702
[DP-SGD] Epoch 4: train_loss=0.5750, val_loss=0.6006, val_acc=89.9100%, ε=4.7850
[DP-SGD] Epoch 5: train_loss=0.5598, val_loss=0.5834, val_acc=90.3900%, ε=5.0671
[DP-SGD] Epoch 6: train_loss=0.5558, val_loss=0.5712, val_acc=90.9200%, ε=5.3266
[DP-SGD] Epoch 7: train_loss=0.5358, val_loss=0.5597, val_acc=91.3200%, ε=5.5691
[DP-SGD] Epoch 8: train_loss=0.5151, val_loss=0.5546, val_acc=91.4100%, ε=5.7982
[DP-SGD] Epoch 9: train_loss=0.4976, val_loss=0.5380, val_acc=91.7400%, ε=6.0164
[DP-SGD] Epoch 10: train_loss=0.4853, val_loss=0.5150, val_acc=92.0300%, ε=6.2255
[DP-SGD] Epoch 11: train_loss=0.4758, val_loss=0.5104, val_acc=92.2300%, ε=6.4267
[DP-SGD] Epoch 12: train_loss=0.4767, val_loss=0.5070, val_acc=92.2700%, ε=6.6211
[DP-SGD] Epoch 13: train_

{'val_acc': 93.2,
 'test_acc': 93.93,
 'epsilon': np.float64(8.005044757651065),
 'train_time': 3316.64049744606,
 'sparsity': np.float64(0.0)}

In [None]:
print("\nRunning DP-SGD baseline with pre weight pruning")
run_experiment(dataset=DATASET_NAME, use_dp=True, pruning_type="PRE", final_sparsity=0.3, fixed_privacy_budget=True)


Running DP-SGD baseline with pre weight pruning




Pruned model sparsity = 29.99%


  loss.backward()


[DP-SGD] Epoch 1: train_loss=0.9401, val_loss=0.5567, val_acc=86.1700%, ε=3.6072
[DP-SGD] Epoch 2: train_loss=0.5412, val_loss=0.5808, val_acc=88.4700%, ε=4.1009
[DP-SGD] Epoch 3: train_loss=0.5531, val_loss=0.6084, val_acc=89.4200%, ε=4.4702
[DP-SGD] Epoch 4: train_loss=0.5635, val_loss=0.6008, val_acc=89.7800%, ε=4.7850
[DP-SGD] Epoch 5: train_loss=0.5529, val_loss=0.6010, val_acc=90.0500%, ε=5.0671
[DP-SGD] Epoch 6: train_loss=0.5376, val_loss=0.5709, val_acc=90.9400%, ε=5.3266
[DP-SGD] Epoch 7: train_loss=0.5266, val_loss=0.5726, val_acc=91.1400%, ε=5.5691
[DP-SGD] Epoch 8: train_loss=0.5048, val_loss=0.5478, val_acc=91.6200%, ε=5.7982
[DP-SGD] Epoch 9: train_loss=0.4881, val_loss=0.5390, val_acc=91.7300%, ε=6.0164
[DP-SGD] Epoch 10: train_loss=0.4815, val_loss=0.5053, val_acc=92.1100%, ε=6.2255
[DP-SGD] Epoch 11: train_loss=0.4630, val_loss=0.5147, val_acc=92.2500%, ε=6.4267
[DP-SGD] Epoch 12: train_loss=0.4597, val_loss=0.4935, val_acc=92.3900%, ε=6.6211
[DP-SGD] Epoch 13: train_

{'val_acc': 93.31,
 'test_acc': 94.3,
 'epsilon': np.float64(8.005044757651065),
 'train_time': 3576.1148726940155,
 'sparsity': np.float64(0.0)}

In [None]:
print("\nRunning DP-SGD baseline with post weight pruning")
run_experiment(dataset=DATASET_NAME, use_dp=True, pruning_type="POST", final_sparsity=0.3, fixed_privacy_budget=True)


Running DP-SGD baseline with post weight pruning


  loss.backward()


[DP-SGD] Epoch 1: train_loss=1.0443, val_loss=0.5630, val_acc=85.5300%, ε=3.6072
[DP-SGD] Epoch 2: train_loss=0.5523, val_loss=0.5927, val_acc=88.1800%, ε=4.1009
[DP-SGD] Epoch 3: train_loss=0.5609, val_loss=0.6007, val_acc=89.0500%, ε=4.4702
[DP-SGD] Epoch 4: train_loss=0.5457, val_loss=0.5906, val_acc=90.0500%, ε=4.7850
[DP-SGD] Epoch 5: train_loss=0.5422, val_loss=0.5860, val_acc=90.3200%, ε=5.0671
[DP-SGD] Epoch 6: train_loss=0.5290, val_loss=0.5756, val_acc=90.6900%, ε=5.3266
[DP-SGD] Epoch 7: train_loss=0.5200, val_loss=0.5783, val_acc=91.0400%, ε=5.5691
[DP-SGD] Epoch 8: train_loss=0.5384, val_loss=0.5642, val_acc=91.2900%, ε=5.7982
[DP-SGD] Epoch 9: train_loss=0.5031, val_loss=0.5587, val_acc=91.5300%, ε=6.0164
[DP-SGD] Epoch 10: train_loss=0.5077, val_loss=0.5549, val_acc=91.8100%, ε=6.2255
[DP-SGD] Epoch 11: train_loss=0.5029, val_loss=0.5376, val_acc=92.0700%, ε=6.4267
[DP-SGD] Epoch 12: train_loss=0.4966, val_loss=0.5402, val_acc=92.0600%, ε=6.6211
[DP-SGD] Epoch 13: train_

{'val_acc': 93.29,
 'test_acc': 93.95,
 'epsilon': np.float64(8.005044757651065),
 'train_time': 3511.04918217659,
 'sparsity': np.float64(0.0)}

In [None]:
print("\nRunning DP-SGD baseline with iterative pruning")
run_experiment(dataset=DATASET_NAME, use_dp=True, pruning_type="LTH", final_sparsity=0.3, fixed_privacy_budget=True, prune_epochs=[5, 10, 15])


Running DP-SGD baseline with iterative pruning


  loss.backward()


[DP-SGD] Epoch 1: train_loss=0.9387, val_loss=0.5509, val_acc=86.3600%, ε=3.6072
Saved early-rewind weights for LTH
[DP-SGD] Epoch 2: train_loss=0.5379, val_loss=0.5909, val_acc=88.4700%, ε=4.1009
[DP-SGD] Epoch 3: train_loss=0.5762, val_loss=0.6129, val_acc=89.1800%, ε=4.4702
[DP-SGD] Epoch 4: train_loss=0.5621, val_loss=0.6106, val_acc=89.7400%, ε=4.7850
[DP-SGD] Epoch 5: train_loss=0.5560, val_loss=0.5950, val_acc=90.2900%, ε=5.0671
Pruning 11.21% at epoch 5
Pruned model sparsity = 11.21%
[DP-SGD] Epoch 6: train_loss=0.5238, val_loss=0.5738, val_acc=88.6700%, ε=5.3266
[DP-SGD] Epoch 7: train_loss=0.5516, val_loss=0.5983, val_acc=89.4700%, ε=5.5691
[DP-SGD] Epoch 8: train_loss=0.5553, val_loss=0.5944, val_acc=89.7500%, ε=5.7982
[DP-SGD] Epoch 9: train_loss=0.5532, val_loss=0.5802, val_acc=90.4900%, ε=6.0164
[DP-SGD] Epoch 10: train_loss=0.5398, val_loss=0.5563, val_acc=90.7900%, ε=6.2255
Pruning 21.16% at epoch 10
Pruned model sparsity = 21.15%
[DP-SGD] Epoch 11: train_loss=0.5077, v

{'val_acc': 91.21,
 'test_acc': 86.81,
 'epsilon': np.float64(8.005044757651065),
 'train_time': 3603.4356832504272,
 'sparsity': np.float64(29.999933400820503)}