In [None]:
# dp_mnist.py
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Hyperparameters for DP (fixed parameters)
fixed_clip_norm = 1.0         # fixed clipping threshold
noise_multiplier = 0.1        # fixed noise multiplier (σ)
batch_size = 64
epochs = 10
learning_rate = 0.01
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define a simple neural network for MNIST
class SimpleMLP(nn.Module):
    def __init__(self):
        super(SimpleMLP, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Prepare the MNIST dataset
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.MNIST(root='./data', train=True,
                               transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False,
                              transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Evaluation function
def evaluate(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.argmax(dim=1)
            correct += (pred == target).sum().item()
            total += target.size(0)
    return correct / total

# Training loop implementing DP-SGD with fixed parameters
def train_dp():
    model = SimpleMLP().to(device)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    for epoch in range(1, epochs+1):
        model.train()
        total_loss = 0.0
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()

            output = model(data)
            loss = F.cross_entropy(output, target)
            loss.backward()

            # Compute total gradient norm (as a proxy for per-sample norms)
            total_norm = 0.0
            for param in model.parameters():
                if param.grad is not None:
                    param_norm = param.grad.data.norm(2)
                    total_norm += param_norm.item() ** 2
            total_norm = total_norm ** 0.5

            # Clip gradients if needed
            clip_coef = fixed_clip_norm / (total_norm + 1e-6)
            if clip_coef < 1:
                for param in model.parameters():
                    if param.grad is not None:
                        param.grad.data.mul_(clip_coef)

            # Add Gaussian noise to each gradient
            for param in model.parameters():
                if param.grad is not None:
                    noise = torch.normal(mean=0, std=noise_multiplier * fixed_clip_norm, size=param.grad.data.shape, device=device)
                    param.grad.data.add_(noise)

            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        test_acc = evaluate(model, test_loader, device)
        print(f'Epoch {epoch:02d} - Loss: {avg_loss:.4f}, Test Accuracy: {test_acc*100:.2f}%')

    # (For demonstration, we print a fixed privacy budget message)
    print("Baseline DP-SGD finished. (Privacy accounting was not detailed here.)")

if __name__ == '__main__':
    train_dp()


Epoch 01 - Loss: 1.2169, Test Accuracy: 86.58%
Epoch 02 - Loss: 0.4888, Test Accuracy: 89.27%
Epoch 03 - Loss: 0.3923, Test Accuracy: 89.92%
Epoch 04 - Loss: 0.3530, Test Accuracy: 90.99%
Epoch 05 - Loss: 0.3298, Test Accuracy: 91.04%
Epoch 06 - Loss: 0.3148, Test Accuracy: 91.32%
Epoch 07 - Loss: 0.3022, Test Accuracy: 91.56%
Epoch 08 - Loss: 0.2927, Test Accuracy: 92.09%
Epoch 09 - Loss: 0.2833, Test Accuracy: 91.98%
Epoch 10 - Loss: 0.2751, Test Accuracy: 92.05%
Baseline DP-SGD finished. (Privacy accounting was not detailed here.)


In [None]:
# adp_mnist_fixed.py
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Initial hyperparameters for adaptive DP
init_clip_norm = 1.0          # starting clipping threshold
init_noise_multiplier = 0.1   # initial noise multiplier (σ)
alpha = 0.2                   # factor to scale the average gradient norm (must be <= 1 to dampen explosive growth)
tau = 0.1                     # smoothing factor for moving average update of clip_norm
beta = 0.9                    # decay factor for noise multiplier when validation improves

batch_size = 64
epochs = 20
learning_rate = 0.01
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define a simple neural network for MNIST
class SimpleMLP(nn.Module):
    def __init__(self):
        super(SimpleMLP, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Prepare the MNIST dataset
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.MNIST(root='./data', train=True,
                               transform=transform, download=True)
# Use part of the training set as a "validation" set
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=1000, shuffle=False)
test_dataset = datasets.MNIST(root='./data', train=False,
                              transform=transform, download=True)
test_loader  = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Evaluation function (for both validation and test)
def evaluate(model, loader, device):
    model.eval()
    total, correct = 0, 0
    loss_total = 0.0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            loss_total += F.cross_entropy(outputs, target, reduction='sum').item()
            preds = outputs.argmax(dim=1)
            correct += (preds == target).sum().item()
            total += target.size(0)
    return loss_total / total, correct / total

# Training loop implementing adaptive DP-SGD with stability fixes
def train_adp():
    model = SimpleMLP().to(device)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    clip_norm = init_clip_norm
    noise_multiplier = init_noise_multiplier
    val_loss_history = []  # record validation losses to check for consecutive decreases

    for epoch in range(1, epochs+1):
        model.train()
        total_loss = 0.0
        batch_grad_norms = []  # record gradient norms in this epoch

        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()

            outputs = model(data)
            loss = F.cross_entropy(outputs, target)
            loss.backward()

            # Compute global gradient norm (all parameters combined)
            total_norm_sq = 0.0
            for param in model.parameters():
                if param.grad is not None:
                    total_norm_sq += param.grad.data.norm(2).item() ** 2
            total_norm = total_norm_sq ** 0.5
            batch_grad_norms.append(total_norm)

            # Clip gradients using the current adaptive clip_norm
            clip_coef = clip_norm / (total_norm + 1e-6)
            if clip_coef < 1:
                for param in model.parameters():
                    if param.grad is not None:
                        param.grad.data.mul_(clip_coef)

            # Add Gaussian noise using the current clip_norm and noise multiplier
            for param in model.parameters():
                if param.grad is not None:
                    noise = torch.normal(mean=0, std=noise_multiplier * clip_norm, size=param.grad.data.shape, device=device)
                    param.grad.data.add_(noise)

            optimizer.step()
            total_loss += loss.item()

        avg_loss = total_loss / len(train_loader)
        avg_grad_norm = sum(batch_grad_norms) / len(batch_grad_norms)

        # Adaptive update: update clip_norm using a moving average to smooth the changes.
        measured_clip = alpha * avg_grad_norm
        new_clip_norm = (1 - tau) * clip_norm + tau * measured_clip

        # Evaluate on the validation set
        val_loss, val_acc = evaluate(model, val_loader, device)
        val_loss_history.append(val_loss)

        # Decay noise multiplier if validation loss decreases for three consecutive epochs
        if len(val_loss_history) >= 3 and val_loss_history[-3] > val_loss_history[-2] > val_loss_history[-1]:
            noise_multiplier = beta * noise_multiplier
            print("Validation loss decreased three epochs in a row. Reducing noise multiplier.")

        # Update clip_norm for the next epoch
        clip_norm = new_clip_norm

        # Evaluate on the test set for monitoring
        test_loss, test_acc = evaluate(model, test_loader, device)
        print(f"Epoch {epoch:02d}: Train Loss: {avg_loss:.4f}, Val Loss: {val_loss:.4f}, Test Acc: {test_acc*100:.2f}%")
        print(f"  (Adaptive clip norm: {clip_norm:.4f}, noise multiplier: {noise_multiplier:.4f})")

    print("Adaptive DP-SGD finished. (Privacy accounting was simulated.)")

if __name__ == '__main__':
    train_adp()


Epoch 01: Train Loss: 1.3210, Val Loss: 0.6808, Test Acc: 85.03%
  (Adaptive clip norm: 0.9184, noise multiplier: 0.1000)
Epoch 02: Train Loss: 0.5443, Val Loss: 0.4636, Test Acc: 88.60%
  (Adaptive clip norm: 0.8446, noise multiplier: 0.1000)
Validation loss decreased three epochs in a row. Reducing noise multiplier.
Epoch 03: Train Loss: 0.4272, Val Loss: 0.3985, Test Acc: 89.48%
  (Adaptive clip norm: 0.7783, noise multiplier: 0.0900)
Validation loss decreased three epochs in a row. Reducing noise multiplier.
Epoch 04: Train Loss: 0.3801, Val Loss: 0.3641, Test Acc: 90.25%
  (Adaptive clip norm: 0.7187, noise multiplier: 0.0810)
Validation loss decreased three epochs in a row. Reducing noise multiplier.
Epoch 05: Train Loss: 0.3540, Val Loss: 0.3465, Test Acc: 90.85%
  (Adaptive clip norm: 0.6655, noise multiplier: 0.0729)
Validation loss decreased three epochs in a row. Reducing noise multiplier.
Epoch 06: Train Loss: 0.3369, Val Loss: 0.3298, Test Acc: 91.14%
  (Adaptive clip norm