# Differential Privacy with CIFAR 10 Dataset

## Settings und Imports

In [None]:
# suppress warnings
import warnings

import numpy as np

warnings.filterwarnings('ignore')

#autoreload other packages when code changed
%load_ext autoreload
%autoreload 2

In [None]:
import torch

from torch import nn
from torch.utils.data import DataLoader
import torchvision

import opacus
from opacus import PrivacyEngine
from opacus.validators import ModuleValidator
from opacus.utils.batch_memory_manager import BatchMemoryManager

from tqdm.notebook import tqdm
import gc
import os

In [None]:
#Own Code
from privacyflow.configs import path_configs
from privacyflow.models import cifar_models

In [None]:
#Check if GPU is available
if torch.cuda.is_available():
    print("GPU will be used")
    device = torch.device('cuda')
else:
    print("No GPU available")
    device = torch.device('cpu')

## Data

In [None]:
train_transform = torchvision.transforms.Compose([
    torchvision.transforms.AutoAugment(),
    torchvision.transforms.ToTensor(),
])

test_transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor()
])


cifar10_data_train = torchvision.datasets.CIFAR10(root=path_configs.CIFAR_FOLDER_PATH, 
                                                    transform=train_transform,
                                                    train=True, 
                                                    download=True)

cifar10_data_test = torchvision.datasets.CIFAR10(root=path_configs.CIFAR_FOLDER_PATH, 
                                                   transform=test_transform,
                                                   train=False, 
                                                   download=True)

train_dataloader = DataLoader(cifar10_data_train,
                              batch_size=64,
                              num_workers=4,
                              shuffle=True)

test_dataloader = DataLoader(cifar10_data_test,
                              batch_size=64,
                              num_workers=8,
                              shuffle=False)

## Models

Train base version of CIFAR-10 model without DPSGD

In [None]:
#Train Base Version of CIFAR Model
model_base = cifar_models.CifarCNNModel(output_size=10).to(device)
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(model_base.parameters(), lr=0.01)

for epoch in range(15):
        model_base.train()
        epoch_loss = 0.0
        for model_inputs, labels in train_dataloader:
            model_inputs = model_inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            model_outputs = model_base(model_inputs)
            loss = criterion(model_outputs, labels)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch: {epoch + 1:2}, Train Loss: {epoch_loss / len(train_dataloader):.5f}")
#torch.save(model_base.state_dict(), f"{path_configs.MODELS_TRAINED_BASE_PATH}/cifar_10_base.pl")

In [None]:
@torch.no_grad()
def test_model(model:nn.Module,
               test_dl:torch.utils.data.DataLoader = test_dataloader,
               len_test_dataset:int = len(cifar10_data_test)):
    model.eval()
    num_corrects = 0
    for model_inputs, labels in test_dl:
        model_inputs = model_inputs.to(device)
        labels = labels.to(device)
        model_outputs = model(model_inputs)
        
        num_corrects += int((torch.argmax(model_outputs,dim=-1) == labels).sum())
    print(f"Test Accuracy: {num_corrects / len_test_dataset}")

In [None]:
test_model(model_base)

## Models - DPSGD

The following cells contains code for training and testing multiple CIFAR-10 models with different parameter combinations

In [None]:
def train_model_dpsgd(model:nn.Module,
                criterion:nn.Module,
                optimizer: opacus.optimizers.optimizer.DPOptimizer,
                train_dl:torch.utils.data.DataLoader,
                privacy_engine:opacus.PrivacyEngine,
                epochs:int=10,
                max_epsilon:int=10,
                delta:float = 1e-5,
                log_level:str = 'info'):
    epsilon_reached = False
    model.to(device)
    model.train()
    for epoch in tqdm(range(epochs)):
        if epsilon_reached:
            break
        epoch_loss = 0.0
        #Train epoch
        for model_inputs, labels in train_dl:
            model_inputs = model_inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            model_outputs = model(model_inputs)
            loss = criterion(model_outputs, labels)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            if max_epsilon < privacy_engine.accountant.get_epsilon(delta=delta):
                print(f"ε Value {max_epsilon:2} reached in Epoch {epoch+1:2}")
                epsilon_reached = True
                break
                
        #print logs
        print(f"Finished Training Epoch: {epoch + 1:2}")
        if log_level.lower() == "debug":
            print(f"ε:{privacy_engine.accountant.get_epsilon(delta=delta):.5f}")
            if not epsilon_reached:
                print(f"Train Loss: {epoch_loss / len(train_dl):.5f}")

In [1]:
#CIFAR-10 DPSGD model parameters
batch_size = 512
delta = 1e-5
clipping_norm = 1e-5
target_epochs = [1,5,10,20,30,50]
target_epsilons = [1,5,10,20,30,50]

In [None]:
train_transform_dpsgd = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
])


cifar10_data_train = torchvision.datasets.CIFAR10(root=path_configs.CIFAR_FOLDER_PATH, 
                                                    transform=train_transform_dpsgd,
                                                    train=True, 
                                                    download=True)

train_dataloader_dpsgd = DataLoader(cifar10_data_train,
                              batch_size=batch_size,
                              num_workers=0,
                              shuffle=True)

In [None]:
for target_epsilon in target_epsilons:
    for num_epochs in target_epochs:
        model_dpsgd = cifar_models.CifarCNNModel()
        model_dpsgd = ModuleValidator.fix(model_dpsgd)
        criterion =  nn.NLLLoss()
        optimizer = torch.optim.Adam(model_dpsgd.parameters(), lr=0.01)
        
        
        privacy_engine = PrivacyEngine(accountant='rdp')
        model_dpsgd, optimizer, train_dl = privacy_engine.make_private_with_epsilon(
            module=model_dpsgd,
            optimizer=optimizer,
            data_loader=train_dataloader_dpsgd,
            epochs=num_epochs,
            target_epsilon=target_epsilon,
            target_delta=delta,
            max_grad_norm=clipping_norm,
        )
        print(f"Training CNN Model on CIFAR Dataset\nNum Epochs = {num_epochs}\ntarget_epsilon={target_epsilon}\nNoise Mult={optimizer.noise_multiplier:.4f}")
        torch.cuda.empty_cache()
        gc.collect() 
        train_model_dpsgd(model=model_dpsgd,
                          criterion=criterion,
                          optimizer=optimizer,
                          train_dl=train_dl,
                          privacy_engine=privacy_engine,
                          max_epsilon=target_epsilon,
                          delta=delta,
                          epochs=num_epochs,
                          log_level="info")

        #torch.save(model_dpsgd._module.state_dict(), f"{path_configs.MODELS_TRAINED_BASE_PATH}/cifar_epsilon{target_epsilon}_batch{batch_size}.pl")
        torch.cuda.empty_cache()
        gc.collect()
        print(f"Test CNN Model on CIFAR Dataset, trained with epochs={num_epochs:2}, batch_size={batch_size}, clipping-norm={clipping_norm} and ε={target_epsilon:2}")
        test_model(model_dpsgd)
        print("-----------------------------------------------")

## Non working examples

The foloowing cell contains code, for the adaptive clipping norm. 
This code throws an exception due to failed checks from the PyTorch Framework

In [None]:
from opacus.optimizers import AdaClipDPOptimizer

delta = 1e-5
target_epsilon = 10
num_epochs= 3

model_dpsgd = cifar_models.CifarCNNModel()
model_dpsgd = ModuleValidator.fix(model_dpsgd)
criterion =  nn.NLLLoss()
optimizer = torch.optim.Adam(model_dpsgd.parameters(), lr=0.01)


sample_rate = 1/len(train_dataloader_dpsgd)
expected_batch_size = train_dataloader_dpsgd.batch_size

privacy_engine = PrivacyEngine(accountant='rdp')
model_dpsgd, optimizer, train_dl = privacy_engine.make_private_with_epsilon(
    module=model_dpsgd,
    optimizer=optimizer,
    data_loader=train_dataloader_dpsgd,
    epochs=num_epochs,
    target_epsilon=target_epsilon,
    target_delta=delta,
    max_grad_norm=1.0
)

optimizer_ada = AdaClipDPOptimizer(
    optimizer=optimizer,
    noise_multiplier=1.0,
    max_grad_norm=optimizer.max_grad_norm,
    expected_batch_size=train_dataloader_dpsgd.batch_size,
    target_unclipped_quantile=0.4,
    clipbound_learning_rate=0.01,
    max_clipbound=1.0,
    min_clipbound=0.0001,
    unclipped_num_std=1.0
)

train_model_dpsgd(model=model_dpsgd,
                  criterion=criterion,
                  optimizer=optimizer_ada,
                  train_dl=train_dl,
                  privacy_engine=privacy_engine,
                  max_epsilon=target_epsilon,
                  delta=delta,
                  epochs=num_epochs+2,
                  log_level="debug")