# Initial Set-up:
This section installs and imports any required libraries that will be used in the code below:

In [1]:
!pip install opacus
!pip install apricot-select numpy scikit-learn
!pip install kmedoids



In [2]:
# For the CNN + DP-SGD
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split, Subset
import numpy as np
from opacus import PrivacyEngine
import time
import matplotlib.pyplot as plt

#For Lazy Greedy
from sklearn.metrics import pairwise_distances
import heapq
from sklearn.metrics import pairwise_distances
from apricot import FacilityLocationSelection
import time

#For K-medoids
import kmedoids
from sklearn.metrics.pairwise import euclidean_distances

# Select Dataset
All dataset arguments default to MNIST. This goes for the `run_experiment` function later. It is important to run `select_dataset` at least once as this loads the data. If you opt to run all cells, it will load MNIST by default.

In [3]:
def select_dataset_greedy(name, n_samples):
  if name == "MNIST":
    transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),])

    full_train = datasets.MNIST(
        root="./data",
        train=True,
        download=True,
        transform=transform
    )

    train_size = 10000
    #val_size = 1000
    val_size = len(full_train) - train_size
    # train_dataset, val_dataset = random_split(full_train, [train_size, val_size])
    train_dataset, subset_rest = random_split(full_train, [train_size, len(full_train) - train_size])
    val_dataset, _ = random_split(subset_rest, [val_size, len(subset_rest) - val_size])

    test_dataset = datasets.MNIST(
        root="./data",
        train=False,
        download=True,
        transform=transform
    )

  elif name == "CIFAR":
    transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),])


    full_train = datasets.CIFAR10(
        root="./data",
        train=True,
        download=True,
        transform=transform
    )

    train_size = 10000
    val_size = len(full_train) - train_size
    #train_size = 5000
    #val_size = len(full_train) - train_size
    #val_size = 1000
    # train_dataset, val_dataset = random_split(full_train, [train_size, val_size])
    train_dataset, subset_rest = random_split(full_train, [train_size, len(full_train) - train_size])
    val_dataset, _ = random_split(subset_rest, [val_size, len(subset_rest) - val_size])


    test_dataset = datasets.CIFAR10(
        root="./data",
        train=False,
        download=True,
        transform=transform
    )

  # train_data_array = train_dataset.dataset.data[train_dataset.indices]
  # CIFAR10/100: data is already numpy array (shape [N, 32, 32, 3])
  # train_data_array = train_dataset.dataset.data[train_dataset.indices]
  train_data_array = train_dataset.dataset.data[train_dataset.indices]
  # Convert to numpy array first if it's a tensor, then convert type
  if isinstance(train_data_array, torch.Tensor):
    train_data_array = train_data_array.cpu().numpy()
  #train_data_array = train_data_array.astype('float32')

  #----------------------MADI ADDED BECAUSE RAM SUCKS---------------------------

  # Create a smaller subset for FacilityLocationSelection to prevent crashes due to large memory usage
  # Adjusting the subset size for testing, e.g., 1000 samples
  # subset_size_for_selection = min(1000, train_data_array.shape[0])
  # # Randomly sample indices for the subset
  # np.random.seed(SEED) # Ensure reproducibility
  # random_indices = np.random.choice(train_data_array.shape[0], subset_size_for_selection, replace=False)
  # subset_for_selection = train_data_array[random_indices]

  #-----------------------------------------------------------------------------

  selector = FacilityLocationSelection(n_samples=n_samples, metric='euclidean', optimizer='lazy', verbose=True)
  #selector = FacilityLocationSelection(n_samples=n_samples, optimizer='lazy', verbose=True)

  # Reshape the subset for selection
  #subset_for_selection = subset_for_selection.reshape(subset_for_selection.shape[0], -1)
  train_data_array = train_data_array.reshape(train_data_array.shape[0], -1)

  # Use the subset for selection
  #selector.fit(subset_for_selection)
  selector.fit(train_data_array)

  selected_indices = selector.ranking

  selected_train_dataset = Subset(train_dataset, selected_indices)

  train_loader = DataLoader(selected_train_dataset, batch_size=BATCH_SIZE, shuffle=True)
  val_loader   = DataLoader(val_dataset, batch_size=256, shuffle=False)
  test_loader  = DataLoader(test_dataset, batch_size=256, shuffle=False)

  return train_loader, val_loader, test_loader

In [25]:
def select_dataset_kmedoids(name, n_clusters):
  if name == "MNIST":
    transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,)),])

    full_train = datasets.MNIST(
        root="./data",
        train=True,
        download=True,
        transform=transform
    )

    train_size = 50000
    #val_size = 1000
    val_size = len(full_train) - train_size
    train_dataset, val_dataset = random_split(full_train, [train_size, val_size])

    test_dataset = datasets.MNIST(
        root="./data",
        train=False,
        download=True,
        transform=transform
    )

  elif name == "CIFAR":
    transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),])


    full_train = datasets.CIFAR10(
        root="./data",
        train=True,
        download=True,
        transform=transform
    )

    #train_size = 5000
    #val_size = 1000
    train_size = 50000
    val_size = len(full_train) - train_size
    train_dataset, val_dataset = random_split(full_train, [train_size, val_size])


    test_dataset = datasets.CIFAR10(
        root="./data",
        train=False,
        download=True,
        transform=transform
    )

  train_data_array = train_dataset.dataset.data[train_dataset.indices]
  # Convert to numpy array first if it's a tensor, then convert type
  if isinstance(train_data_array, torch.Tensor):
    train_data_array = train_data_array.cpu().numpy()
  train_data_array = train_data_array.astype('float32')
  train_data_flat = train_data_array.reshape(train_data_array.shape[0], -1)

  # Create a smaller subset for kmedoids to prevent crashes due to large memory usage
  subset_size_for_kmedoids = min(5000, train_data_array.shape[0]) # Adjusted subset size
  np.random.seed(SEED) # Ensure reproducibility
  random_indices_kmedoids = np.random.choice(train_data_array.shape[0], subset_size_for_kmedoids, replace=False)
  subset_for_kmedoids = train_data_flat[random_indices_kmedoids]

  # Calculate distances only for the subset
  diss = euclidean_distances(subset_for_kmedoids)

  kmeds = kmedoids.KMedoids(n_clusters=n_clusters, method='fasterpam', random_state=0, metric='precomputed')
  model = kmeds.fit(diss)

  # Map the medoid indices from the subset back to the original dataset indices
  selected_subset_indices = model.medoid_indices_
  selected_original_indices = random_indices_kmedoids[selected_subset_indices]
  select = [train_dataset.indices[i] for i in selected_original_indices]

  selected_train_dataset = Subset(full_train, select)

  train_loader = DataLoader(selected_train_dataset, batch_size=BATCH_SIZE, shuffle=True)
  val_loader   = DataLoader(val_dataset, batch_size=256, shuffle=False)
  test_loader  = DataLoader(test_dataset, batch_size=256, shuffle=False)

  return train_loader, val_loader, test_loader

# Baseline CNN Model

This model is chosen in `run_experiment` if an invalid dataset is selected. I should probably replace this with a `ThrowException` instead though.

In [26]:
# baseline model
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        #=======================================#
        #            MNIST Settings
        #=======================================#

        # self.conv1 = nn.Conv2d(1, 16, 3, 1)
        # self.conv2 = nn.Conv2d(16, 32, 3, 1)
        # self.fc1 = nn.Linear(32*12*12, 64)
        # self.fc2 = nn.Linear(64, 10)

        #=======================================#
        #            CIFAR-10 Settings
        #=======================================#

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(in_features=576, out_features=64)
        self.fc2 = nn.Linear(in_features=64, out_features=10)

    def forward(self, x):
        # x = F.relu(self.conv1(x))
        # x = F.relu(self.conv2(x))
        # x = F.max_pool2d(x, 2)
        # x = torch.flatten(x, 1)
        # x = F.relu(self.fc1(x))
        # return self.fc2(x)

        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = torch.flatten(x, 1)

        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


## Model Architectures
MNIST and CIFAR-10 require different NN structures, so they have their own separate classes that get initialized in `run_experiment`.

In [27]:
# MNIST Model
class CNN_MNIST(nn.Module):
    def __init__(self):
        super().__init__()

        #=======================================#
        #            MNIST Settings
        #=======================================#

        self.conv1 = nn.Conv2d(1, 16, 3, 1)
        self.conv2 = nn.Conv2d(16, 32, 3, 1)
        self.fc1 = nn.Linear(32*12*12, 64)
        self.fc2 = nn.Linear(64, 10)

    def forward(self, x):

        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)


# CIFAR-10 Model

class CNN_CIFAR10(nn.Module):
    def __init__(self):
        super().__init__()

        #=======================================#
        #            CIFAR-10 Settings
        #=======================================#

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=8, kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(in_features=576, out_features=64)
        self.fc2 = nn.Linear(in_features=64, out_features=10)

    def forward(self, x):

        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = torch.flatten(x, 1)

        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


# Model Training
Courtesy of Megha

In [28]:
# @title
def train_one_epoch(model, loader, optimizer):
    # train loop
    model.train()
    total_loss = 0
    for batch_idx, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.cross_entropy(out, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

In [29]:
# @title
@torch.no_grad()
def evaluate(model, loader):
    # evaluation
    model.eval()
    loss, correct = 0, 0
    for data, target in loader:
        data, target = data.to(device), target.to(device)
        out = model(data)
        loss += F.cross_entropy(out, target, reduction="sum").item()
        pred = out.argmax(1)
        correct += pred.eq(target).sum().item()
    loss /= len(loader.dataset)
    acc = 100. * correct / len(loader.dataset)
    return loss, acc

In [30]:
# @title
"""
dataset: either MNIST or CIFAR
I set dataset_name as a hyperparameter we can use for convenience and easy swapping
Otherwise, it defaults to MNIST, and invalid datasets give the base CNN model
This makes the dataset selection a modular component, should we want to choose other datasets
"""

def run_experiment(dataset, use_dp=False, fixed_privacy_budget=False):

    # setup model and optimizer
    if dataset == 'MNIST':
      model = CNN_MNIST().to(device)
    elif dataset == "CIFAR":
      model = CNN_CIFAR10().to(device)

    # Defaults to base CNN class if no dataset is specified
    else:
      model = CNN().to(device)

    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9)

    privacy_engine = None
    if use_dp:

        if fixed_privacy_budget:

            privacy_engine = PrivacyEngine()

            # Calculates sigma based on target epsilon and delta
            model, optimizer, train_loader_dp = privacy_engine.make_private_with_epsilon(
              module=model,
              optimizer=optimizer,
              data_loader=train_loader,
              max_grad_norm=MAX_GRAD_NORM,
              target_delta=TARGET_DELTA,
              target_epsilon=TARGET_EPSILON,
              epochs=NUM_EPOCHS
        )
        else:

            privacy_engine = PrivacyEngine()

            # Otherwise takes sigma as a hyperparameter
            model, optimizer, train_loader_dp = privacy_engine.make_private(
              module=model,
              optimizer=optimizer,
              data_loader=train_loader,
              max_grad_norm=MAX_GRAD_NORM,
              noise_multiplier=NOISE_MULTIPLIER,
        )
    else:
        train_loader_dp = train_loader


    # train
    best_val_acc = 0
    epochs_no_improve = 0
    best_model_path = f"best_model{'_dp' if use_dp else ''}.pt"

    start_time = time.time()

    for epoch in range(1, EPOCHS + 1):
        train_loss = train_one_epoch(model, train_loader_dp, optimizer)
        val_loss, val_acc = evaluate(model, val_loader)

        eps = privacy_engine.get_epsilon(DELTA)

        print(f"[{'DP-SGD' if use_dp else 'Standard SGD'}] Epoch {epoch}: "
            + f"train_loss={train_loss:.4f}, "
            + f"val_loss={val_loss:.4f}, val_acc={val_acc:.4f}%"
            + f", ε={eps:.4f}" if use_dp else "")

        # if fixed_privacy_budget:
        #   eps = privacy_engine.get_epsilon(TARGET_DELTA)
        #   print(f", ε={eps:.4f}")

        # elif use_dp:
        #   eps = privacy_engine.get_epsilon(DELTA)
        #   print()

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            epochs_no_improve = 0
            torch.save(model.state_dict(), best_model_path)
        else:
            epochs_no_improve += 1

        if epochs_no_improve >= PATIENCE:
            print(f"\nEarly stopping at epoch {epoch} (no improvement for {PATIENCE} epochs).")
            break


    end_time = time.time()
    total_time = end_time - start_time

    # test on best model
    model.load_state_dict(torch.load(best_model_path))
    test_loss, test_acc = evaluate(model, test_loader)
    final_eps = privacy_engine.get_epsilon(DELTA) if use_dp else None

    print(f"\n[{ 'DP-SGD' if use_dp else 'Standard-SGD'}]")
    print(f"Best val acc: {best_val_acc:.4f}%, Test acc: {test_acc:.4f}%\n" + (f" ε={final_eps:.4f}\n" if use_dp else "") + (f"Total runtime: {total_time:.4f} seconds"))
    return best_val_acc, test_acc, final_eps

# Initial Benchmarks
## Model Hyperparameters

In [31]:
# @title
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

BATCH_SIZE = 128
LR = 0.01
EPOCHS = 20
SEED = 42
MAX_GRAD_NORM = 1.0
DELTA = 1e-5
PATIENCE = 5

# For dynamically calculated epsilon
NOISE_MULTIPLIER = 1.0

# For a fixed privacy budget (eps, del)
TARGET_EPSILON = 8
TARGET_DELTA = 1e-5
NUM_EPOCHS = 20

torch.manual_seed(SEED)
np.random.seed(SEED)

# TESTS:

## MNIST

### Lazy Greedy

In [11]:
DATASET_NAME = "MNIST"
N_SAMPLES = 100
train_loader, val_loader, test_loader = select_dataset_greedy(DATASET_NAME, N_SAMPLES)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)

100%|██████████| 9.91M/9.91M [00:00<00:00, 37.5MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.04MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 9.38MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 9.57MB/s]
100%|██████████| 100/100 [00:00<00:00, 171it/s] 
  z = np.log(np.where(t > np.log(1 - q), (np.exp(t) + q - 1) / q, 1))
  t > np.log(1 - q),



Running DP-SGD baseline for fixed (8, 1e-05)-DP


  loss.backward()


[DP-SGD] Epoch 1: train_loss=2.3196, val_loss=2.3154, val_acc=7.3000%, ε=1.4451
[DP-SGD] Epoch 2: train_loss=2.3182, val_loss=2.3134, val_acc=8.1000%, ε=2.1200
[DP-SGD] Epoch 3: train_loss=2.3153, val_loss=2.3105, val_acc=9.2000%, ε=2.6597
[DP-SGD] Epoch 4: train_loss=2.3108, val_loss=2.3071, val_acc=10.3000%, ε=3.1286
[DP-SGD] Epoch 5: train_loss=2.3052, val_loss=2.3039, val_acc=11.1000%, ε=3.5518
[DP-SGD] Epoch 6: train_loss=2.2995, val_loss=2.3005, val_acc=12.0000%, ε=3.9423
[DP-SGD] Epoch 7: train_loss=2.2936, val_loss=2.2973, val_acc=14.1000%, ε=4.3079
[DP-SGD] Epoch 8: train_loss=2.2879, val_loss=2.2938, val_acc=15.6000%, ε=4.6538
[DP-SGD] Epoch 9: train_loss=2.2818, val_loss=2.2899, val_acc=16.5000%, ε=4.9834
[DP-SGD] Epoch 10: train_loss=2.2751, val_loss=2.2857, val_acc=17.6000%, ε=5.2994
[DP-SGD] Epoch 11: train_loss=2.2682, val_loss=2.2815, val_acc=18.1000%, ε=5.6037
[DP-SGD] Epoch 12: train_loss=2.2609, val_loss=2.2768, val_acc=18.4000%, ε=5.8979
[DP-SGD] Epoch 13: train_los

(22.1, 22.48, np.float64(7.997143272375973))

In [14]:
DATASET_NAME = "MNIST"
N_SAMPLES = 200
train_loader, val_loader, test_loader = select_dataset_greedy(DATASET_NAME, N_SAMPLES)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)

100%|██████████| 200/200 [00:00<00:00, 265it/s]



Running DP-SGD baseline for fixed (8, 1e-05)-DP


  loss.backward()


[DP-SGD] Epoch 1: train_loss=2.3105, val_loss=2.2990, val_acc=10.2000%, ε=1.7625
[DP-SGD] Epoch 2: train_loss=2.3029, val_loss=2.2933, val_acc=11.7000%, ε=2.4240
[DP-SGD] Epoch 3: train_loss=2.2951, val_loss=2.2852, val_acc=15.1000%, ε=2.9456
[DP-SGD] Epoch 4: train_loss=2.2857, val_loss=2.2757, val_acc=18.2000%, ε=3.3954
[DP-SGD] Epoch 5: train_loss=2.2692, val_loss=2.2657, val_acc=21.4000%, ε=3.7996
[DP-SGD] Epoch 6: train_loss=2.2621, val_loss=2.2552, val_acc=24.1000%, ε=4.1714
[DP-SGD] Epoch 7: train_loss=2.2440, val_loss=2.2443, val_acc=26.4000%, ε=4.5186
[DP-SGD] Epoch 8: train_loss=2.2292, val_loss=2.2334, val_acc=30.1000%, ε=4.8463
[DP-SGD] Epoch 9: train_loss=2.1994, val_loss=2.2216, val_acc=31.1000%, ε=5.1581
[DP-SGD] Epoch 10: train_loss=2.1959, val_loss=2.2085, val_acc=33.0000%, ε=5.4565
[DP-SGD] Epoch 11: train_loss=2.1756, val_loss=2.1948, val_acc=35.4000%, ε=5.7435
[DP-SGD] Epoch 12: train_loss=2.1528, val_loss=2.1804, val_acc=36.2000%, ε=6.0207
[DP-SGD] Epoch 13: train_

(41.9, 40.95, np.float64(7.990183309027119))

In [11]:
DATASET_NAME = "MNIST"
N_SAMPLES = 300
train_loader, val_loader, test_loader = select_dataset_greedy(DATASET_NAME, N_SAMPLES)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)

100%|██████████| 300/300 [00:32<00:00, 9.27it/s]



Running DP-SGD baseline for fixed (8, 1e-05)-DP




[DP-SGD] Epoch 1: train_loss=2.3054, val_loss=2.3046, val_acc=10.2100%, ε=1.9068
[DP-SGD] Epoch 2: train_loss=2.2925, val_loss=2.3029, val_acc=11.0200%, ε=2.5514
[DP-SGD] Epoch 3: train_loss=2.2932, val_loss=2.3007, val_acc=11.9300%, ε=3.0612
[DP-SGD] Epoch 4: train_loss=2.2984, val_loss=2.2980, val_acc=13.2400%, ε=3.5012
[DP-SGD] Epoch 5: train_loss=2.2956, val_loss=2.2951, val_acc=15.2100%, ε=3.8968
[DP-SGD] Epoch 6: train_loss=2.2873, val_loss=2.2916, val_acc=16.7700%, ε=4.2607
[DP-SGD] Epoch 7: train_loss=2.2800, val_loss=2.2875, val_acc=16.7800%, ε=4.6005
[DP-SGD] Epoch 8: train_loss=2.2735, val_loss=2.2831, val_acc=16.2800%, ε=4.9213
[DP-SGD] Epoch 9: train_loss=2.2623, val_loss=2.2785, val_acc=16.2900%, ε=5.2264
[DP-SGD] Epoch 10: train_loss=2.2602, val_loss=2.2737, val_acc=15.6400%, ε=5.5184
[DP-SGD] Epoch 11: train_loss=2.2545, val_loss=2.2693, val_acc=14.9200%, ε=5.7993
[DP-SGD] Epoch 12: train_loss=2.2438, val_loss=2.2646, val_acc=14.2600%, ε=6.0705

Early stopping at epoch 

(16.78, 22.58, np.float64(6.070487622310106))

In [16]:
DATASET_NAME = "MNIST"
N_SAMPLES = 500
train_loader, val_loader, test_loader = select_dataset_greedy(DATASET_NAME, N_SAMPLES)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)

100%|██████████| 500/500 [00:02<00:00, 213it/s]



Running DP-SGD baseline for fixed (8, 1e-05)-DP




[DP-SGD] Epoch 1: train_loss=2.3066, val_loss=2.2973, val_acc=10.5820%, ε=1.9909
[DP-SGD] Epoch 2: train_loss=2.2871, val_loss=2.2859, val_acc=11.0320%, ε=2.6240
[DP-SGD] Epoch 3: train_loss=2.2733, val_loss=2.2734, val_acc=10.6120%, ε=3.1260
[DP-SGD] Epoch 4: train_loss=2.2504, val_loss=2.2607, val_acc=9.8840%, ε=3.5598
[DP-SGD] Epoch 5: train_loss=2.2303, val_loss=2.2467, val_acc=9.8160%, ε=3.9499
[DP-SGD] Epoch 6: train_loss=2.2086, val_loss=2.2286, val_acc=12.4840%, ε=4.3089
[DP-SGD] Epoch 7: train_loss=2.1845, val_loss=2.2082, val_acc=19.3300%, ε=4.6443
[DP-SGD] Epoch 8: train_loss=2.1536, val_loss=2.1845, val_acc=24.6360%, ε=4.9609
[DP-SGD] Epoch 9: train_loss=2.1149, val_loss=2.1568, val_acc=32.4360%, ε=5.2620
[DP-SGD] Epoch 10: train_loss=2.0778, val_loss=2.1238, val_acc=41.7440%, ε=5.5503
[DP-SGD] Epoch 11: train_loss=2.0503, val_loss=2.0877, val_acc=46.2000%, ε=5.8275
[DP-SGD] Epoch 12: train_loss=2.0034, val_loss=2.0454, val_acc=48.0040%, ε=6.0951
[DP-SGD] Epoch 13: train_lo

(59.34, 60.45, np.float64(7.995831917202194))

### K-Medoids

In [36]:
DATASET_NAME = "MNIST"
N_CLUSTERS = 10
train_loader, val_loader, test_loader = select_dataset_kmedoids(DATASET_NAME, N_CLUSTERS)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)


Running DP-SGD baseline for fixed (8, 1e-05)-DP


  z = np.log(np.where(t > np.log(1 - q), (np.exp(t) + q - 1) / q, 1))
  t > np.log(1 - q),


[DP-SGD] Epoch 1: train_loss=2.3326, val_loss=2.3110, val_acc=8.6900%, ε=1.4451
[DP-SGD] Epoch 2: train_loss=2.3368, val_loss=2.3144, val_acc=8.8500%, ε=2.1200
[DP-SGD] Epoch 3: train_loss=2.3389, val_loss=2.3139, val_acc=9.3500%, ε=2.6597
[DP-SGD] Epoch 4: train_loss=2.3312, val_loss=2.3119, val_acc=10.2300%, ε=3.1286
[DP-SGD] Epoch 5: train_loss=2.3224, val_loss=2.3061, val_acc=10.7600%, ε=3.5518
[DP-SGD] Epoch 6: train_loss=2.3058, val_loss=2.3012, val_acc=10.1600%, ε=3.9423
[DP-SGD] Epoch 7: train_loss=2.2826, val_loss=2.2924, val_acc=10.2000%, ε=4.3079
[DP-SGD] Epoch 8: train_loss=2.2437, val_loss=2.2897, val_acc=9.8600%, ε=4.6538
[DP-SGD] Epoch 9: train_loss=2.2205, val_loss=2.2892, val_acc=10.2700%, ε=4.9834
[DP-SGD] Epoch 10: train_loss=2.1927, val_loss=2.2954, val_acc=10.2800%, ε=5.2994

Early stopping at epoch 10 (no improvement for 5 epochs).

[DP-SGD]
Best val acc: 10.7600%, Test acc: 10.6000%
 ε=5.2994
Total runtime: 21.8153 seconds


(10.76, 10.6, np.float64(5.2993806631722595))

In [33]:
DATASET_NAME = "MNIST"
N_CLUSTERS = 200
train_loader, val_loader, test_loader = select_dataset_kmedoids(DATASET_NAME, N_CLUSTERS)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)


Running DP-SGD baseline for fixed (8, 1e-05)-DP


  loss.backward()


[DP-SGD] Epoch 1: train_loss=2.3025, val_loss=2.3084, val_acc=9.9327%, ε=1.7625
[DP-SGD] Epoch 2: train_loss=2.3110, val_loss=2.3025, val_acc=9.9727%, ε=2.4240
[DP-SGD] Epoch 3: train_loss=2.3012, val_loss=2.2932, val_acc=10.1636%, ε=2.9456
[DP-SGD] Epoch 4: train_loss=2.2943, val_loss=2.2820, val_acc=10.7218%, ε=3.3954
[DP-SGD] Epoch 5: train_loss=2.2677, val_loss=2.2700, val_acc=12.8927%, ε=3.7996
[DP-SGD] Epoch 6: train_loss=2.2499, val_loss=2.2570, val_acc=17.2055%, ε=4.1714
[DP-SGD] Epoch 7: train_loss=2.2398, val_loss=2.2437, val_acc=20.5055%, ε=4.5186
[DP-SGD] Epoch 8: train_loss=2.2237, val_loss=2.2307, val_acc=24.4618%, ε=4.8463
[DP-SGD] Epoch 9: train_loss=2.2035, val_loss=2.2178, val_acc=28.1927%, ε=5.1581
[DP-SGD] Epoch 10: train_loss=2.1782, val_loss=2.2053, val_acc=30.8382%, ε=5.4565
[DP-SGD] Epoch 11: train_loss=2.1719, val_loss=2.1911, val_acc=32.8782%, ε=5.7435
[DP-SGD] Epoch 12: train_loss=2.1585, val_loss=2.1762, val_acc=34.9818%, ε=6.0207
[DP-SGD] Epoch 13: train_lo

(46.89636363636364, 47.11, np.float64(7.990183309027119))

In [34]:
DATASET_NAME = "MNIST"
N_CLUSTERS = 300
train_loader, val_loader, test_loader = select_dataset_kmedoids(DATASET_NAME, N_CLUSTERS)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)


Running DP-SGD baseline for fixed (8, 1e-05)-DP


  loss.backward()


[DP-SGD] Epoch 1: train_loss=2.3021, val_loss=2.3055, val_acc=8.5036%, ε=1.9068
[DP-SGD] Epoch 2: train_loss=2.2906, val_loss=2.2966, val_acc=9.8618%, ε=2.5514
[DP-SGD] Epoch 3: train_loss=2.2763, val_loss=2.2831, val_acc=10.9582%, ε=3.0612
[DP-SGD] Epoch 4: train_loss=2.2631, val_loss=2.2673, val_acc=11.9800%, ε=3.5012
[DP-SGD] Epoch 5: train_loss=2.2399, val_loss=2.2497, val_acc=12.7873%, ε=3.8968
[DP-SGD] Epoch 6: train_loss=2.2179, val_loss=2.2305, val_acc=15.9782%, ε=4.2607
[DP-SGD] Epoch 7: train_loss=2.1792, val_loss=2.2092, val_acc=21.4400%, ε=4.6005
[DP-SGD] Epoch 8: train_loss=2.1620, val_loss=2.1855, val_acc=26.6418%, ε=4.9213
[DP-SGD] Epoch 9: train_loss=2.1347, val_loss=2.1602, val_acc=31.0836%, ε=5.2264
[DP-SGD] Epoch 10: train_loss=2.1184, val_loss=2.1311, val_acc=37.4236%, ε=5.5184
[DP-SGD] Epoch 11: train_loss=2.0647, val_loss=2.0985, val_acc=43.4109%, ε=5.7993
[DP-SGD] Epoch 12: train_loss=2.0232, val_loss=2.0597, val_acc=47.2400%, ε=6.0705
[DP-SGD] Epoch 13: train_lo

(56.71272727272727, 57.56, np.float64(7.996436921674794))

In [24]:
DATASET_NAME = "MNIST"
N_CLUSTERS = 500
train_loader, val_loader, test_loader = select_dataset_kmedoids(DATASET_NAME, N_CLUSTERS)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)


Running DP-SGD baseline for fixed (8, 1e-05)-DP




[DP-SGD] Epoch 1: train_loss=2.3015, val_loss=2.2967, val_acc=10.0350%, ε=1.9909
[DP-SGD] Epoch 2: train_loss=2.2865, val_loss=2.2819, val_acc=13.3100%, ε=2.6240
[DP-SGD] Epoch 3: train_loss=2.2684, val_loss=2.2620, val_acc=17.7550%, ε=3.1260
[DP-SGD] Epoch 4: train_loss=2.2438, val_loss=2.2406, val_acc=22.5000%, ε=3.5598
[DP-SGD] Epoch 5: train_loss=2.2173, val_loss=2.2160, val_acc=26.9000%, ε=3.9499
[DP-SGD] Epoch 6: train_loss=2.1798, val_loss=2.1836, val_acc=30.9850%, ε=4.3089
[DP-SGD] Epoch 7: train_loss=2.1496, val_loss=2.1445, val_acc=37.2500%, ε=4.6443
[DP-SGD] Epoch 8: train_loss=2.1125, val_loss=2.0995, val_acc=44.7100%, ε=4.9609
[DP-SGD] Epoch 9: train_loss=2.0532, val_loss=2.0493, val_acc=52.3000%, ε=5.2620
[DP-SGD] Epoch 10: train_loss=1.9883, val_loss=1.9941, val_acc=55.8500%, ε=5.5503
[DP-SGD] Epoch 11: train_loss=1.9130, val_loss=1.9323, val_acc=58.1650%, ε=5.8275
[DP-SGD] Epoch 12: train_loss=1.8628, val_loss=1.8622, val_acc=59.9850%, ε=6.0951
[DP-SGD] Epoch 13: train_

(71.24, 71.28, np.float64(7.995831917202194))

In [32]:
DATASET_NAME = "MNIST"
N_CLUSTERS = 500
train_loader, val_loader, test_loader = select_dataset_kmedoids(DATASET_NAME, N_CLUSTERS)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)


Running DP-SGD baseline for fixed (8, 1e-05)-DP




[DP-SGD] Epoch 1: train_loss=2.2978, val_loss=2.2974, val_acc=10.1400%, ε=1.9909
[DP-SGD] Epoch 2: train_loss=2.2834, val_loss=2.2831, val_acc=13.0300%, ε=2.6240
[DP-SGD] Epoch 3: train_loss=2.2663, val_loss=2.2643, val_acc=14.0400%, ε=3.1260
[DP-SGD] Epoch 4: train_loss=2.2412, val_loss=2.2441, val_acc=13.8700%, ε=3.5598
[DP-SGD] Epoch 5: train_loss=2.2171, val_loss=2.2217, val_acc=14.3700%, ε=3.9499
[DP-SGD] Epoch 6: train_loss=2.1891, val_loss=2.1929, val_acc=17.3400%, ε=4.3089
[DP-SGD] Epoch 7: train_loss=2.1482, val_loss=2.1585, val_acc=22.8700%, ε=4.6443
[DP-SGD] Epoch 8: train_loss=2.1071, val_loss=2.1167, val_acc=31.9200%, ε=4.9609
[DP-SGD] Epoch 9: train_loss=2.0592, val_loss=2.0683, val_acc=40.1700%, ε=5.2620
[DP-SGD] Epoch 10: train_loss=1.9990, val_loss=2.0120, val_acc=46.8100%, ε=5.5503
[DP-SGD] Epoch 11: train_loss=1.9475, val_loss=1.9491, val_acc=52.5400%, ε=5.8275
[DP-SGD] Epoch 12: train_loss=1.8538, val_loss=1.8781, val_acc=57.2400%, ε=6.0951
[DP-SGD] Epoch 13: train_

(72.9, 73.49, np.float64(7.995831917202194))

In [35]:
DATASET_NAME = "MNIST"
N_CLUSTERS = 5000
train_loader, val_loader, test_loader = select_dataset_kmedoids(DATASET_NAME, N_CLUSTERS)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)


Running DP-SGD baseline for fixed (8, 1e-05)-DP




[DP-SGD] Epoch 1: train_loss=2.2242, val_loss=2.0802, val_acc=50.6100%, ε=2.7177
[DP-SGD] Epoch 2: train_loss=1.7895, val_loss=1.4029, val_acc=65.2500%, ε=3.2566
[DP-SGD] Epoch 3: train_loss=1.0957, val_loss=0.8283, val_acc=73.7300%, ε=3.6873
[DP-SGD] Epoch 4: train_loss=0.7106, val_loss=0.6205, val_acc=78.9500%, ε=4.0630
[DP-SGD] Epoch 5: train_loss=0.5977, val_loss=0.5512, val_acc=82.0800%, ε=4.4034
[DP-SGD] Epoch 6: train_loss=0.5416, val_loss=0.5399, val_acc=83.4600%, ε=4.7182
[DP-SGD] Epoch 7: train_loss=0.5147, val_loss=0.5207, val_acc=85.1500%, ε=5.0135
[DP-SGD] Epoch 8: train_loss=0.5508, val_loss=0.5131, val_acc=86.1800%, ε=5.2931
[DP-SGD] Epoch 9: train_loss=0.5531, val_loss=0.5057, val_acc=86.7000%, ε=5.5597
[DP-SGD] Epoch 10: train_loss=0.4933, val_loss=0.5174, val_acc=86.7900%, ε=5.8155
[DP-SGD] Epoch 11: train_loss=0.5131, val_loss=0.5285, val_acc=87.3900%, ε=6.0619
[DP-SGD] Epoch 12: train_loss=0.5380, val_loss=0.5270, val_acc=87.6800%, ε=6.3000
[DP-SGD] Epoch 13: train_

(89.15, 89.68, np.float64(7.997052150973982))

## CIFAR-10

### Lazy Greedy

In [14]:
DATASET_NAME = "CIFAR"

N_SAMPLES = 50
train_loader, val_loader, test_loader = select_dataset_greedy(DATASET_NAME, N_SAMPLES)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)

100%|██████████| 50.0/50.0 [00:00<00:00, 172it/s] 
  z = np.log(np.where(t > np.log(1 - q), (np.exp(t) + q - 1) / q, 1))
  t > np.log(1 - q),



Running DP-SGD baseline for fixed (8, 1e-05)-DP




[DP-SGD] Epoch 1: train_loss=2.3183, val_loss=2.3029, val_acc=9.9822%, ε=1.4451
[DP-SGD] Epoch 2: train_loss=2.3173, val_loss=2.3029, val_acc=10.0489%, ε=2.1200
[DP-SGD] Epoch 3: train_loss=2.3154, val_loss=2.3030, val_acc=10.1889%, ε=2.6597
[DP-SGD] Epoch 4: train_loss=2.3128, val_loss=2.3030, val_acc=10.4733%, ε=3.1286
[DP-SGD] Epoch 5: train_loss=2.3098, val_loss=2.3030, val_acc=10.9022%, ε=3.5518
[DP-SGD] Epoch 6: train_loss=2.3064, val_loss=2.3030, val_acc=11.4244%, ε=3.9423
[DP-SGD] Epoch 7: train_loss=2.3021, val_loss=2.3030, val_acc=11.2400%, ε=4.3079
[DP-SGD] Epoch 8: train_loss=2.2972, val_loss=2.3029, val_acc=10.5356%, ε=4.6538
[DP-SGD] Epoch 9: train_loss=2.2920, val_loss=2.3029, val_acc=10.1444%, ε=4.9834
[DP-SGD] Epoch 10: train_loss=2.2865, val_loss=2.3030, val_acc=10.0289%, ε=5.2994
[DP-SGD] Epoch 11: train_loss=2.2808, val_loss=2.3032, val_acc=9.9756%, ε=5.6037

Early stopping at epoch 11 (no improvement for 5 epochs).

[DP-SGD]
Best val acc: 11.4244%, Test acc: 11.090

(11.424444444444445, 11.09, np.float64(5.603719670759895))

In [12]:
DATASET_NAME = "CIFAR"
N_SAMPLES = 100
train_loader, val_loader, test_loader = select_dataset_greedy(DATASET_NAME, N_SAMPLES)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)

100%|██████████| 100/100 [00:00<00:00, 144it/s] 
  z = np.log(np.where(t > np.log(1 - q), (np.exp(t) + q - 1) / q, 1))
  t > np.log(1 - q),



Running DP-SGD baseline for fixed (8, 1e-05)-DP


  loss.backward()


[DP-SGD] Epoch 1: train_loss=2.3166, val_loss=2.3018, val_acc=11.0000%, ε=1.4451
[DP-SGD] Epoch 2: train_loss=2.3159, val_loss=2.3017, val_acc=10.7000%, ε=2.1200
[DP-SGD] Epoch 3: train_loss=2.3147, val_loss=2.3016, val_acc=11.0000%, ε=2.6597
[DP-SGD] Epoch 4: train_loss=2.3130, val_loss=2.3014, val_acc=11.5000%, ε=3.1286
[DP-SGD] Epoch 5: train_loss=2.3108, val_loss=2.3013, val_acc=11.0000%, ε=3.5518
[DP-SGD] Epoch 6: train_loss=2.3082, val_loss=2.3012, val_acc=11.5000%, ε=3.9423
[DP-SGD] Epoch 7: train_loss=2.3053, val_loss=2.3011, val_acc=10.7000%, ε=4.3079
[DP-SGD] Epoch 8: train_loss=2.3022, val_loss=2.3010, val_acc=9.5000%, ε=4.6538
[DP-SGD] Epoch 9: train_loss=2.2989, val_loss=2.3009, val_acc=9.8000%, ε=4.9834

Early stopping at epoch 9 (no improvement for 5 epochs).

[DP-SGD]
Best val acc: 11.5000%, Test acc: 10.5500%
 ε=4.9834
Total runtime: 4.4745 seconds


(11.5, 10.55, np.float64(4.9833854040797485))

In [13]:
DATASET_NAME = "CIFAR"
N_SAMPLES = 200
train_loader, val_loader, test_loader = select_dataset_greedy(DATASET_NAME, N_SAMPLES)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)

100%|██████████| 200/200 [00:00<00:00, 361it/s]



Running DP-SGD baseline for fixed (8, 1e-05)-DP


  loss.backward()


[DP-SGD] Epoch 1: train_loss=2.3009, val_loss=2.3056, val_acc=9.6000%, ε=1.7625
[DP-SGD] Epoch 2: train_loss=2.3006, val_loss=2.3055, val_acc=9.6000%, ε=2.4240
[DP-SGD] Epoch 3: train_loss=2.3015, val_loss=2.3055, val_acc=9.3000%, ε=2.9456
[DP-SGD] Epoch 4: train_loss=2.3032, val_loss=2.3053, val_acc=10.1000%, ε=3.3954
[DP-SGD] Epoch 5: train_loss=2.3002, val_loss=2.3053, val_acc=10.0000%, ε=3.7996
[DP-SGD] Epoch 6: train_loss=2.2977, val_loss=2.3053, val_acc=9.8000%, ε=4.1714
[DP-SGD] Epoch 7: train_loss=2.3008, val_loss=2.3053, val_acc=9.9000%, ε=4.5186
[DP-SGD] Epoch 8: train_loss=2.2968, val_loss=2.3052, val_acc=10.2000%, ε=4.8463
[DP-SGD] Epoch 9: train_loss=2.2968, val_loss=2.3051, val_acc=11.1000%, ε=5.1581
[DP-SGD] Epoch 10: train_loss=2.2958, val_loss=2.3051, val_acc=11.3000%, ε=5.4565
[DP-SGD] Epoch 11: train_loss=2.2925, val_loss=2.3049, val_acc=11.8000%, ε=5.7435
[DP-SGD] Epoch 12: train_loss=2.2977, val_loss=2.3049, val_acc=11.5000%, ε=6.0207
[DP-SGD] Epoch 13: train_loss=

(11.8, 10.9, np.float64(7.051483052991745))

In [14]:
DATASET_NAME = "CIFAR"
N_SAMPLES = 300
train_loader, val_loader, test_loader = select_dataset_greedy(DATASET_NAME, N_SAMPLES)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)

100%|██████████| 300/300 [00:00<00:00, 455it/s]



Running DP-SGD baseline for fixed (8, 1e-05)-DP


  loss.backward()


[DP-SGD] Epoch 1: train_loss=2.3108, val_loss=2.3066, val_acc=8.7000%, ε=1.9068
[DP-SGD] Epoch 2: train_loss=2.3033, val_loss=2.3056, val_acc=9.8000%, ε=2.5514
[DP-SGD] Epoch 3: train_loss=2.3014, val_loss=2.3046, val_acc=10.3000%, ε=3.0612
[DP-SGD] Epoch 4: train_loss=2.2969, val_loss=2.3035, val_acc=10.6000%, ε=3.5012
[DP-SGD] Epoch 5: train_loss=2.3008, val_loss=2.3023, val_acc=11.0000%, ε=3.8968
[DP-SGD] Epoch 6: train_loss=2.2962, val_loss=2.3010, val_acc=11.5000%, ε=4.2607
[DP-SGD] Epoch 7: train_loss=2.2896, val_loss=2.2994, val_acc=12.4000%, ε=4.6005
[DP-SGD] Epoch 8: train_loss=2.2963, val_loss=2.2978, val_acc=13.4000%, ε=4.9213
[DP-SGD] Epoch 9: train_loss=2.2830, val_loss=2.2959, val_acc=13.6000%, ε=5.2264
[DP-SGD] Epoch 10: train_loss=2.2813, val_loss=2.2941, val_acc=14.8000%, ε=5.5184
[DP-SGD] Epoch 11: train_loss=2.2839, val_loss=2.2925, val_acc=14.1000%, ε=5.7993
[DP-SGD] Epoch 12: train_loss=2.2791, val_loss=2.2912, val_acc=15.3000%, ε=6.0705
[DP-SGD] Epoch 13: train_lo

(16.8, 15.43, np.float64(7.996436921674794))

In [23]:
DATASET_NAME = "CIFAR"
N_SAMPLES = 500
train_loader, val_loader, test_loader = select_dataset_greedy(DATASET_NAME, N_SAMPLES)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)

100%|██████████| 500/500 [00:02<00:00, 246it/s]



Running DP-SGD baseline for fixed (8, 1e-05)-DP


  loss.backward()


[DP-SGD] Epoch 1: train_loss=2.2994, val_loss=2.3072, val_acc=12.6000%, ε=1.9909
[DP-SGD] Epoch 2: train_loss=2.2991, val_loss=2.3067, val_acc=12.5000%, ε=2.6240
[DP-SGD] Epoch 3: train_loss=2.3008, val_loss=2.3061, val_acc=11.9500%, ε=3.1260
[DP-SGD] Epoch 4: train_loss=2.2996, val_loss=2.3053, val_acc=11.4500%, ε=3.5598
[DP-SGD] Epoch 5: train_loss=2.2908, val_loss=2.3045, val_acc=11.8000%, ε=3.9499
[DP-SGD] Epoch 6: train_loss=2.2914, val_loss=2.3039, val_acc=12.0000%, ε=4.3089

Early stopping at epoch 6 (no improvement for 5 epochs).

[DP-SGD]
Best val acc: 12.6000%, Test acc: 12.2500%
 ε=4.3089
Total runtime: 10.1027 seconds


(12.6, 12.25, np.float64(4.308944961647152))

### K-Medoids

In [None]:
DATASET_NAME = "CIFAR"
N_CLUSTERS = 100
train_loader, val_loader, test_loader = select_dataset_kmedoids(DATASET_NAME, N_CLUSTERS)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)

In [None]:
DATASET_NAME = "CIFAR"
N_CLUSTERS = 200
train_loader, val_loader, test_loader = select_dataset_kmedoids(DATASET_NAME, N_CLUSTERS)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)

In [None]:
DATASET_NAME = "CIFAR"
N_CLUSTERS = 300
train_loader, val_loader, test_loader = select_dataset_kmedoids(DATASET_NAME, N_CLUSTERS)

print(f"\nRunning DP-SGD baseline for fixed ({TARGET_EPSILON}, {TARGET_DELTA})-DP")
run_experiment(dataset=DATASET_NAME, use_dp=True, fixed_privacy_budget=True)