### Imports

In [None]:
import torch
from torchvision import datasets, transforms
import numpy as np
import pandas as pd
from opacus import PrivacyEngine
import seaborn as sns
import time
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# To run in Google Colab
import os
from google.colab import drive

### Data

In [None]:
train_loader = torch.utils.data.DataLoader(datasets.MNIST('../mnist', train=True, download=True,
               transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,),
               (0.3081,)),]),), batch_size=64, shuffle=True, num_workers=1, pin_memory=True)

test_loader = torch.utils.data.DataLoader(datasets.MNIST('../mnist', train=False,
              transform=transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,),
              (0.3081,)),]),), batch_size=256, shuffle=True, num_workers=1, pin_memory=True)

100%|██████████| 9.91M/9.91M [00:00<00:00, 18.7MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 602kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 5.57MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 4.76MB/s]


### Pytorch module

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model = torch.nn.Sequential(torch.nn.Conv2d(1, 16, 8, 2, padding=3), torch.nn.ReLU(), torch.nn.MaxPool2d(2, 1),
        torch.nn.Conv2d(16, 32, 4, 2),  torch.nn.ReLU(), torch.nn.MaxPool2d(2, 1), torch.nn.Flatten(),
        torch.nn.Linear(32 * 4 * 4, 32), torch.nn.ReLU(), torch.nn.Linear(32, 10)).to(device)

optimizer = torch.optim.SGD(base_model.parameters(), lr=0.05)

### Pytorch module

In [None]:
privacy_engine = PrivacyEngine()
model, optimizer, data_loader = privacy_engine.make_private(
    module=base_model,
    optimizer=optimizer,
    data_loader=train_loader,
    noise_multiplier=1.1, # How much noise the model takes
    max_grad_norm=1.0, # How much clipping the model does
)



### Train loop

In [None]:
def train_opacus(model, pe, train_loader, optimizer, epoch, device, delta):
    model.train()
    criterion = torch.nn.CrossEntropyLoss()
    losses = []
    for _batch_idx, (data, target) in enumerate(tqdm(train_loader)):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
    epsilon = pe.get_epsilon(delta)
    print(
        f"Train Epoch: {epoch} t"
        f"Loss: {np.mean(losses):.6f} "
        f"(ε = {epsilon:.2f}, δ = {delta})")
    return epsilon, losses

def train(model, train_loader, optimizer, epoch, device):
    model.train()
    criterion = torch.nn.CrossEntropyLoss()
    losses = []
    for _batch_idx, (data, target) in enumerate(tqdm(train_loader)):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
    return losses

### Main

In [None]:
def measure_model(model,pe, train_loader, optimizer, device):
    epsilon = []
    loss = []
    times = []
    for epoch in range(1, 11):
        t0 = time.perf_counter()
        eps, losses = train_opacus(model, pe,train_loader, optimizer, epoch, device=device, delta=1e-5)
        dt = time.perf_counter() - t0
        times.append(dt)
        epsilon.append(eps)
        loss.append(losses)
    return epsilon, loss, times

### Measurement

In [None]:
def validate(model, data_loader, device=torch.device('cpu'), target=None):
    """
    Given a model and data loader, compute precision, recall, accuracy & F1.
    If `target` is None, runs in multiclass mode (macro‐averaged).
    Otherwise does binary‐on‐`target`.
    """
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for x, y in data_loader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            preds = logits.argmax(dim=1)
            y_true.extend(y.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())

    if target is not None:
        y_true = [1 if y==target else 0 for y in y_true]
        y_pred = [1 if p==target else 0 for p in y_pred]
        average = 'binary'
        pos_label = 1
    else:
        average = 'macro'
        pos_label = None

    measures = ["accuracy", "precision", "recall", "f1-score"]
    acc   = accuracy_score(y_true, y_pred)
    prec  = precision_score(y_true, y_pred, average=average, pos_label=pos_label, zero_division=0)
    rec   = recall_score(y_true, y_pred, average=average, pos_label=pos_label, zero_division=0)
    f1    = f1_score(y_true, y_pred, average=average, pos_label=pos_label, zero_division=0)

    print(f"Accuracy  | {acc:.3f}")
    print(f"Precision | {prec:.3f}")
    print(f"Recall    | {rec:.3f}")
    print(f"F1 Score  | {f1:.3f}")

    return prec, rec, acc, f1, measures

### Comparing different values for noise and clipping

In [None]:
def run_experiments(noise_list, clip_list, epochs=5, delta=1e-5, device="cpu"):
    """
    Vary noise_multiplier and max_grad_norm, collect final loss & ε.
    Returns a DataFrame with columns [noise, clip, epoch, loss, epsilon].
    """
    records = []
    test_records = []
    for noise in noise_list:
        for clip in clip_list:
            model = torch.nn.Sequential(torch.nn.Conv2d(1, 16, 8, 2, padding=3), torch.nn.ReLU(), torch.nn.MaxPool2d(2, 1),
              torch.nn.Conv2d(16, 32, 4, 2),  torch.nn.ReLU(), torch.nn.MaxPool2d(2, 1), torch.nn.Flatten(),
              torch.nn.Linear(32 * 4 * 4, 32), torch.nn.ReLU(), torch.nn.Linear(32, 10)).to(device)

            opt = torch.optim.SGD(model.parameters(), lr=0.05)
            pe = PrivacyEngine()

            batch_size = train_loader.batch_size
            sample_rate = batch_size / len(train_loader.dataset)

            model_priv, optimizer_priv, loader = pe.make_private(
                sample_rate = sample_rate,
                module=model,
                optimizer=opt,
                data_loader=train_loader,
                noise_multiplier=noise,
                max_grad_norm=clip,
            )
            for ep in range(1, epochs+1):
                eps, loss = train_opacus(model_priv, pe,loader, optimizer_priv, ep, device, delta)
                records.append({
                    "noise": noise,
                    "clip": clip,
                    "epoch": ep,
                    "loss": loss,
                    "epsilon": eps,
                })
                prec, rec, acc, f1, measures = validate(model_priv, test_loader)
                test_records.append({
                    "noise":noise,
                    "clip":clip,
                    "precision": prec,
                    "recall":rec,
                    "accuracy":acc,
                    "f1":f1,
                    "epoch":ep
                })
    return pd.DataFrame(records), pd.DataFrame(test_records)

def run_base(epochs=5, device="cpu"):
    """
    Vary noise_multiplier and max_grad_norm, collect final loss & ε.
    Returns a DataFrame with columns [noise, clip, epoch, loss, epsilon].
    """
    records = []
    test_records = []
    model = torch.nn.Sequential(torch.nn.Conv2d(1, 16, 8, 2, padding=3), torch.nn.ReLU(), torch.nn.MaxPool2d(2, 1),
      torch.nn.Conv2d(16, 32, 4, 2),  torch.nn.ReLU(), torch.nn.MaxPool2d(2, 1), torch.nn.Flatten(),
      torch.nn.Linear(32 * 4 * 4, 32), torch.nn.ReLU(), torch.nn.Linear(32, 10)).to(device)

    opt = torch.optim.SGD(model.parameters(), lr=0.05)

    for ep in range(1, epochs+1):
        loss = train(model, train_loader, opt, ep, device)
        records.append({
            "epoch": ep,
            "loss": loss,
        })
        prec, rec, acc, f1, measures = validate(model, test_loader)
        test_records.append({
            "precision": prec,
            "recall":rec,
            "accuracy":acc,
            "f1":f1,
            "epoch":ep
        })
    return pd.DataFrame(records), pd.DataFrame(test_records)

noises = [0.5, 1.0, 1.5, 2.0]
clips  = [0.5, 1.0, 2.0]
df_train, df_test = run_experiments(noises, clips, epochs = 5, device = device)
df_train_base, df_test_base = run_base(device = device)

100%|██████████| 938/938 [00:33<00:00, 27.92it/s]


Accuracy  | 0.950
Precision | 0.952
Recall    | 0.950
F1 Score  | 0.947


100%|██████████| 938/938 [00:34<00:00, 27.44it/s]


Accuracy  | 0.985
Precision | 0.985
Recall    | 0.985
F1 Score  | 0.985


100%|██████████| 938/938 [00:31<00:00, 29.60it/s]


Accuracy  | 0.985
Precision | 0.985
Recall    | 0.985
F1 Score  | 0.985


100%|██████████| 938/938 [00:32<00:00, 28.95it/s]


Accuracy  | 0.989
Precision | 0.989
Recall    | 0.988
F1 Score  | 0.988


100%|██████████| 938/938 [00:33<00:00, 28.30it/s]


Accuracy  | 0.990
Precision | 0.989
Recall    | 0.989
F1 Score  | 0.989


### Save to directory for visualization

In [None]:
drive.mount('/content/drive')

save_dir = '/content/drive/MyDrive/colab_results'
os.makedirs(save_dir, exist_ok=True)

df_train_base.to_csv(os.path.join(save_dir, 'train_results_base.csv'),    index=False)
df_test_base.to_csv(os.path.join(save_dir, 'validation_results_base.csv'), index=False)