In [None]:
! pip install opacus

In [7]:
import torch
from torchvision import datasets, transforms
import numpy as np
import opacus
from tqdm import tqdm
import random

def seed_everything(seed: int):
    # Set the random seed for Python, NumPy, and PyTorch
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # If using CUDA
    # If you are using CuDNN, you can set it to deterministic mode for further reproducibility
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(123456)


# privacy related
target_eps = 1
delta = 1e-5
clipping_th = 1
sampling_rate = 0.1 
T = 100
E = int(T * sampling_rate)

# Loading MNIST Data
train_dataset = datasets.MNIST(
    '../mnist',
    train=True,
    download=True,
    transform=transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.Normalize(
                (0.1307,), (0.3081,)
            )
        ]
    )
)
test_dataset = datasets.MNIST(
    '../mnist',
    train=False,
    download=True,
    transform=transforms.Compose(
        [
            transforms.ToTensor(),
            transforms.Normalize(
                (0.1307,), (0.3081,)
            )
        ]
    )
)

#
print(f'Number of examples in train dataset: {len(train_dataset)}')
print(f'Number of examples in test dataset: {len(test_dataset)}')
print(f'Total number of iteration: {T}, number of epoches: {E}, sampling rate: {sampling_rate}')

# dataloders
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=int(len(train_dataset) * sampling_rate),
    shuffle=True,
    num_workers=4,
    pin_memory=True
)

test_loader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=1024,
    shuffle=False,
    num_workers=4,
    pin_memory=True
)

# Creating a PyTorch Neural Network Classification Model and Optimizer
model = torch.nn.Sequential(
    torch.nn.Conv2d(1, 16, 8, 2, padding=3),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(2, 1),

    torch.nn.Conv2d(16, 32, 4, 2),
    torch.nn.ReLU(),
    torch.nn.MaxPool2d(2, 1),

    torch.nn.Flatten(),
    torch.nn.Linear(32 * 4 * 4, 32), torch.nn.ReLU(), torch.nn.Linear(32, 10)
)

print(f'Number of trainable parameters: {sum([p.numel() for p in model.parameters()])}')

optimizer_origin = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

Number of examples in train dataset: 60000
Number of examples in test dataset: 10000
Total number of iteration: 100, number of epoches: 10, sampling rate: 0.1
Number of trainable parameters: 26010


In [8]:
import opacus.accountants.rdp as privacy_accountant
PA = privacy_accountant.RDPAccountant()

# sub routine, give noise multiplier sigma, fixed iteration number, compute the privacy loss
def from_noise_to_epsilon(sigma, delta, sampling_rate, T):
    for i in range(T):
        PA.step(noise_multiplier = sigma, sample_rate = sampling_rate)
    result = PA.get_privacy_spent(delta = delta)
    PA.history = []
    return result[0]

# given privacy budget compute how much noise multiplier should be set to satisfy such
def from_privacy_budget_to_noise(target_eps, delta, sampling_rate, T):
    sigma_low = 0.1
    sigma_high = 50
    sigma_mid = (sigma_low + sigma_high) / 2
    # TODO, implement binary search
    tolerance = delta/10  
    
    
    while sigma_high - sigma_low > tolerance:
        sigma_mid = (sigma_low + sigma_high) / 2
        
        eps = from_noise_to_epsilon(
            sigma_mid, 
            delta, 
            sampling_rate, 
            T
        )
        if eps > target_eps:
            sigma_low = sigma_mid
        else:
            sigma_high = sigma_mid
            
    return (sigma_low + sigma_high) / 2

sigma_of_noise = from_privacy_budget_to_noise(target_eps=1, delta=delta, sampling_rate=sampling_rate, T=T)
print(f'To run {T} iteration with sampling rate {sampling_rate}')
print(f'Should set sigma = {sigma_of_noise:.6f} such that we have (({target_eps},{delta})-DP guarantee')

To run 100 iteration with sampling rate 0.1
Should set sigma = 4.277611 such that we have ((1,1e-05)-DP guarantee


In [9]:
# Attaching a Differential Privacy Engine to the Optimizer
privacy_engine = opacus.PrivacyEngine()

model, optimizer, dataloader = privacy_engine.make_private(
    module=model,
    optimizer=optimizer_origin,
    data_loader=train_loader,
    noise_multiplier=sigma_of_noise,
    max_grad_norm=clipping_th,
 )


In [14]:
def compute_per_grad_norm(iterator):
    per_grad_norm = 0
    for p in iterator:
        n_sample = p.shape[0]
        per_grad_norm += torch.norm(p.view(n_sample, -1), dim = 1, p = 2)**2
    per_grad_norm = per_grad_norm**0.5
    return per_grad_norm

def make_broadcastable(tensor_to_be_reshape, target_tensor):
    broadcasting_shape = (-1, *[1 for _ in target_tensor.shape[1:]])
    return tensor_to_be_reshape.reshape(broadcasting_shape)

def clip_per_example_grad(container, clipping_th):
    container = list(container)
    per_grad_norm = compute_per_grad_norm(container) + 1e-7

    ''' clipping '''
    scaling = torch.minimum(
        torch.ones_like(per_grad_norm),
        clipping_th / per_grad_norm
    )
    
    for i in range(len(container)):
        scale_factor = make_broadcastable(scaling, container[i])
        container[i] = container[i] * scale_factor

    return container

def clip_grad(model, clipping_th):
    contrainer = [p.grad_sample for p in model.parameters()]
    return clip_per_example_grad(contrainer, clipping_th)

def check_manual_clipping_correctness(model):
    clipped_grad_contrainer = clip_grad(model, clipping_th)
    for p, clipped_grad_sub_part in zip(model.parameters(), clipped_grad_contrainer):
        clipped_sumed_grad_by_opacus = p.summed_grad
        clipped_sumed_grad_manual = clipped_grad_sub_part.sum(dim = 0)
        print(torch.norm(clipped_sumed_grad_by_opacus), torch.norm(clipped_sumed_grad_manual))
        assert torch.allclose(torch.norm(clipped_sumed_grad_by_opacus), torch.norm(clipped_sumed_grad_manual), rtol = 1e-5), f'{torch.norm(clipped_sumed_grad_by_opacus)}, {torch.norm(clipped_sumed_grad_manual)},'


In [11]:
# Step 5: Training the private model over multiple epochs
def test(model, test_loader, device, delta):
    model.eval()
    losses = []
    accuracy = []
    with torch.no_grad():
        for _batch_idx, (data, target) in enumerate(tqdm(test_loader)):
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            losses.append(loss.item())
            accuracy.append((output.argmax(dim=1) == target).float().mean())

    print(f"\nTest Loss: {np.mean(losses):.6f}, accuracy:  {np.mean(accuracy):.6f}")

def train(model, train_loader, optimizer, epoch, device, delta):
    print(f'\n=> Epoch: {epoch}')
    model.train()
    losses = []
    accuracy = []
    for _batch_idx, (data, target) in enumerate(tqdm(train_loader)):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()

        output = model(data)

        loss = criterion(output, target)
        loss.backward()

        optimizer.step()

        # check_manual_clipping_correctness(model)

        losses.append(loss.item())
        accuracy.append((output.argmax(dim=1) == target).float().mean())

    print(f"Train Loss: {np.mean(losses):.6f}, accuracy:  {np.mean(accuracy):.6f}")
    test(model, test_loader, device, delta)

#
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(E):
    train(model, train_loader, optimizer, epoch, device=device, delta=1e-5)


=> Epoch: 0


100%|██████████| 10/10 [00:19<00:00,  1.92s/it]


Train Loss: 1.717661, accuracy:  0.394117


100%|██████████| 10/10 [00:03<00:00,  3.33it/s]



Test Loss: 0.934917, accuracy:  0.715900

=> Epoch: 1


100%|██████████| 10/10 [00:17<00:00,  1.77s/it]


Train Loss: 1.134279, accuracy:  0.773433


100%|██████████| 10/10 [00:03<00:00,  3.25it/s]



Test Loss: 1.120249, accuracy:  0.837946

=> Epoch: 2


100%|██████████| 10/10 [00:14<00:00,  1.43s/it]


Train Loss: 1.182921, accuracy:  0.849233


100%|██████████| 10/10 [00:02<00:00,  4.47it/s]



Test Loss: 0.970666, accuracy:  0.881407

=> Epoch: 3


100%|██████████| 10/10 [00:14<00:00,  1.42s/it]


Train Loss: 0.954596, accuracy:  0.881500


100%|██████████| 10/10 [00:02<00:00,  4.07it/s]



Test Loss: 0.795841, accuracy:  0.900470

=> Epoch: 4


100%|██████████| 10/10 [00:14<00:00,  1.43s/it]


Train Loss: 0.755628, accuracy:  0.901617


100%|██████████| 10/10 [00:02<00:00,  4.82it/s]



Test Loss: 0.648888, accuracy:  0.916281

=> Epoch: 5


100%|██████████| 10/10 [00:14<00:00,  1.44s/it]


Train Loss: 0.618576, accuracy:  0.914850


100%|██████████| 10/10 [00:02<00:00,  4.89it/s]



Test Loss: 0.535862, accuracy:  0.927368

=> Epoch: 6


100%|██████████| 10/10 [00:14<00:00,  1.41s/it]


Train Loss: 0.523119, accuracy:  0.926000


100%|██████████| 10/10 [00:02<00:00,  4.73it/s]



Test Loss: 0.471344, accuracy:  0.934120

=> Epoch: 7


100%|██████████| 10/10 [00:13<00:00,  1.38s/it]


Train Loss: 0.474324, accuracy:  0.933450


100%|██████████| 10/10 [00:02<00:00,  4.87it/s]



Test Loss: 0.432128, accuracy:  0.939302

=> Epoch: 8


100%|██████████| 10/10 [00:14<00:00,  1.40s/it]


Train Loss: 0.424102, accuracy:  0.938050


100%|██████████| 10/10 [00:02<00:00,  4.74it/s]



Test Loss: 0.416884, accuracy:  0.939429

=> Epoch: 9


100%|██████████| 10/10 [00:13<00:00,  1.39s/it]


Train Loss: 0.390571, accuracy:  0.941500


100%|██████████| 10/10 [00:02<00:00,  4.85it/s]


Test Loss: 0.373229, accuracy:  0.943929



