In [1]:
import time
import warnings
import numpy as np

warnings.simplefilter("ignore")

import torch
import torchvision
from torch import nn, optim
import torch.multiprocessing as mp
import torchvision.transforms as transforms

from opacus import PrivacyEngine
from opacus.validators import ModuleValidator
from opacus.utils.batch_memory_manager import BatchMemoryManager

from tqdm.notebook import tqdm 


if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

In [2]:
# Define data transformations
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

In [3]:
# Hyperparameters for training and privacy
MAX_GRAD_NORM = 1.2
EPSILON = 50.0
DELTA = 1e-5

EPOCHS = 1  #20
LR = 1e-3

In [6]:
# Function to make the model differentially private
def make_private(model, optimizer, train_loader):
    model_dp, optimizer_dp, train_loader_dp = privacy_engine.make_private_with_epsilon(
        module=model,
        optimizer=optimizer,
        data_loader=train_loader,
        epochs=EPOCHS,
        target_epsilon=EPSILON,
        target_delta=DELTA,
        max_grad_norm=MAX_GRAD_NORM,
    )
    return model_dp, optimizer_dp, train_loader_dp

# Function to calculate accuracy
def accuracy(preds, labels):
    return (preds == labels).mean()

# Function for training the model
def train(model, optimizer, train_loader, device, dp, task): 
    start_time = time.time()

    model = model.to(device)

    for epoch in tqdm(range(EPOCHS), desc="Epoch", unit="epoch"):
    
        model.train()
        criterion = nn.CrossEntropyLoss()

        losses = []
        accs = []

        # BatchMemoryManager manages the memory usage (e.g. you can use bigger logical batches)
        with BatchMemoryManager(data_loader=train_loader, max_physical_batch_size=128, optimizer=optimizer) as loader:
            if not dp:
                loader = train_loader

            for i, (images, target) in enumerate(loader):   
                optimizer.zero_grad()
                images = images.to(device)
                target = target.to(device)
                
                if task == 'multi_class': 
                    target = target.squeeze().float()
                
                output = model(images)
                loss = criterion(output, target)

                preds = np.argmax(output.detach().cpu().numpy(), axis=1)
                labels = target.detach().cpu().numpy()

                acc = accuracy(preds, labels)

                losses.append(loss.item())
                accs.append(acc)

                loss.backward()
                optimizer.step()

            if dp:
                epsilon = privacy_engine.get_epsilon(DELTA)
            else:
                epsilon = float('inf')
            print(
                f"Training Epoch {epoch+1:02d} \t"
                f"Loss: {np.mean(losses):.6f} | "
                f"Acc: {np.mean(accs) * 100:.6f} | "
                f"ε = {epsilon:.2f}"
            )
                
    end_time = time.time()
    training_time = end_time - start_time
    print(f'\nTraining Time \t\t{training_time:.2f} seconds')
                
# Function for testing the model
def test(model, test_loader, device, dp, task):
    model = model.to(device)
    
    model.eval()
    criterion = nn.CrossEntropyLoss()
        
    losses = []
    accs = []

    with torch.no_grad():
        for images, target in test_loader:
            images = images.to(device)
            target = target.to(device)

            if task == 'multi_class': 
                target = target.squeeze().float()
                
            output = model(images)
            loss = criterion(output, target)
            
            preds = np.argmax(output.detach().cpu().numpy(), axis=1)
            labels = target.detach().cpu().numpy()
            
            acc = accuracy(preds, labels)

            losses.append(loss.item())
            accs.append(acc)

    print(
        f"Test Set \t\t"
        f"Loss: {np.mean(losses):.6f} | "
        f"Acc: {np.mean(accs) * 100:.6f} "
    )

# Function for the overall pipeline
def pipeline(model, train_loader, test_loader, device, task=None):
    optimizer = optim.RMSprop(model.parameters(), lr=LR)

    # Specific layers of the model can't be made differentially private, so the model has to be fixed
    model_fixed = ModuleValidator.fix(model)
    optimizer_fixed = optim.RMSprop(model_fixed.parameters(), lr=LR)

    model_dp, optimizer_dp, train_loader_dp = make_private(model_fixed, optimizer_fixed, train_loader)
    
    print(f'\n===== With DP =====')
    train(model_dp, optimizer_dp, train_loader_dp, device, True, task)
    test(model_dp, test_loader, device, True, task)
    print(f'\n===== Without DP =====')
    train(model, optimizer, train_loader, device, False, task)
    test(model, test_loader, device, False, task)

In [6]:
# CIFAR 10
train_dataset = torchvision.datasets.CIFAR10(root='/Users/valentinbiller/Downloads/data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=256, shuffle=True)

test_dataset = torchvision.datasets.CIFAR10(root='/Users/valentinbiller/Downloads/data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=256, shuffle=False)

# Initialize the privacy engine and the model
privacy_engine = PrivacyEngine()
model = torchvision.models.resnet18(num_classes=10)

pipeline(model, train_loader, test_loader, device)

Files already downloaded and verified
Files already downloaded and verified

===== With DP =====


Epoch:   0%|          | 0/20 [00:00<?, ?epoch/s]

Training Epoch 01 	Loss: 2.044835 | Acc: 29.813140 | ε = 15.40
Training Epoch 02 	Loss: 1.731952 | Acc: 44.228565 | ε = 18.97
Training Epoch 03 	Loss: 1.744378 | Acc: 47.843522 | ε = 21.78
Training Epoch 04 	Loss: 1.725699 | Acc: 49.884614 | ε = 24.21
Training Epoch 05 	Loss: 1.716553 | Acc: 51.629311 | ε = 26.40
Training Epoch 06 	Loss: 1.690524 | Acc: 53.580725 | ε = 28.43
Training Epoch 07 	Loss: 1.707080 | Acc: 54.283086 | ε = 30.34
Training Epoch 08 	Loss: 1.673652 | Acc: 55.519441 | ε = 32.14
Training Epoch 09 	Loss: 1.690878 | Acc: 55.878178 | ε = 33.87
Training Epoch 10 	Loss: 1.661158 | Acc: 56.794265 | ε = 35.54
Training Epoch 11 	Loss: 1.664296 | Acc: 57.527864 | ε = 37.14
Training Epoch 12 	Loss: 1.645446 | Acc: 58.255640 | ε = 38.70
Training Epoch 13 	Loss: 1.658301 | Acc: 58.339165 | ε = 40.22
Training Epoch 14 	Loss: 1.642412 | Acc: 59.203316 | ε = 41.70
Training Epoch 15 	Loss: 1.640699 | Acc: 59.127673 | ε = 43.15
Training Epoch 16 	Loss: 1.618777 | Acc: 60.014105 | ε 

Epoch:   0%|          | 0/20 [00:00<?, ?epoch/s]

Training Epoch 01 	Loss: 1.555629 | Acc: 44.900351 | ε = inf
Training Epoch 02 	Loss: 1.050227 | Acc: 62.592474 | ε = inf
Training Epoch 03 	Loss: 0.841995 | Acc: 70.547672 | ε = inf
Training Epoch 04 	Loss: 0.693986 | Acc: 75.778460 | ε = inf
Training Epoch 05 	Loss: 0.571016 | Acc: 80.101642 | ε = inf
Training Epoch 06 	Loss: 0.463993 | Acc: 83.962054 | ε = inf
Training Epoch 07 	Loss: 0.368484 | Acc: 87.204241 | ε = inf
Training Epoch 08 	Loss: 0.285049 | Acc: 90.141901 | ε = inf
Training Epoch 09 	Loss: 0.226170 | Acc: 92.120536 | ε = inf
Training Epoch 10 	Loss: 0.184158 | Acc: 93.557079 | ε = inf
Training Epoch 11 	Loss: 0.150194 | Acc: 94.693080 | ε = inf
Training Epoch 12 	Loss: 0.132126 | Acc: 95.380261 | ε = inf
Training Epoch 13 	Loss: 0.116747 | Acc: 96.020807 | ε = inf
Training Epoch 14 	Loss: 0.109167 | Acc: 96.247608 | ε = inf
Training Epoch 15 	Loss: 0.089109 | Acc: 96.926419 | ε = inf
Training Epoch 16 	Loss: 0.085142 | Acc: 96.988202 | ε = inf
Training Epoch 17 	Loss:

In [None]:
import medmnist
from medmnist import INFO

root = '/Users/valentinbiller/Downloads/'
datasets = ['dermamnist', 'pneumoniamnist', 'retinamnist', 'bloodmnist', 'organcmnist']

transformMedMNIST = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5], std=[.5]),
])

for data_flag in datasets:
    
    info = INFO[data_flag]
    DataClass = getattr(medmnist, info['python_class'])
    
    train_dataset = DataClass(split='train', transform=transformMedMNIST, download=True, root=root)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=256, shuffle=True)

    test_dataset = DataClass(split='test', transform=transformMedMNIST, download=True, root=root)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=256, shuffle=False)
    
    num_classes = len(info['label'])
    
    privacy_engine = PrivacyEngine()
    model = torchvision.models.resnet18(num_classes=num_classes)
    if data_flag == 'pneumoniamnist' or data_flag == 'organcmnist':     
        model.conv1 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
 
    print(f'\n\n\n ========== {data_flag} ========== \n')
    pipeline(model, train_loader, test_loader, device, task='multi_class')