# Differential Privacy for Vision Tasks

## Settings und Imports

In [2]:
# suppress warnings
import warnings

import opacus

warnings.filterwarnings('ignore')

#autoreload other packages when code changed
%load_ext autoreload
%autoreload 2

In [3]:
import torch

torch.manual_seed(20)  #Reproduzierbarkeit
from torch import nn
from torch.utils.data import DataLoader
import torchvision

from opacus import PrivacyEngine
from opacus.validators import ModuleValidator

from tqdm.notebook import tqdm

In [4]:
#Own Code
from privacyflow.configs import path_configs
from privacyflow.datasets import faces_dataset
from privacyflow.models import face_models

In [5]:
#Check if GPU is available
if torch.cuda.is_available():
    print("GPU will be used")
    device = torch.device('cuda')
else:
    print("No GPU available")
    device = torch.device('cpu')

GPU will be used


## Data Prep

In [6]:
label_columns = 'all'  #40 attributes

data_augmentation_train = torchvision.transforms.Compose([
    #torchvision.transforms.Resize((224,224)),
    torchvision.transforms.AutoAugment(),
    torchvision.transforms.ToTensor(),
])

data_augmentation_test = torchvision.transforms.Compose([
    #torchvision.transforms.Resize((224,224)),
    torchvision.transforms.ToTensor()
])

data_augmentation_train_with_resize = torchvision.transforms.Compose([
    torchvision.transforms.Resize((224, 224)),
    torchvision.transforms.AutoAugment(),
    torchvision.transforms.ToTensor(),
])

data_augmentation_test_with_resize = torchvision.transforms.Compose([
    torchvision.transforms.Resize((224, 224)),
    torchvision.transforms.ToTensor()
])


train_dataset_cnn = faces_dataset.FacesDataset(label_cols=label_columns, mode="train",transform=data_augmentation_train)
val_dataset_cnn = faces_dataset.FacesDataset(label_cols=label_columns, mode="val", transform=data_augmentation_test)
test_dataset_cnn = faces_dataset.FacesDataset(label_cols=label_columns, mode="test", transform=data_augmentation_test)

train_dataset_vit = faces_dataset.FacesDataset(label_cols=label_columns, mode="train",transform=data_augmentation_train_with_resize)
val_dataset_vit = faces_dataset.FacesDataset(label_cols=label_columns, mode="val",transform=data_augmentation_test_with_resize)
test_dataset_vit = faces_dataset.FacesDataset(label_cols=label_columns, mode="test",transform=data_augmentation_test_with_resize)

## Model - Base

In [11]:
def load_saved_model(model_type:str = "cnn", alt_path:str= None):
    if model_type == "cnn":
        model = face_models.get_FaceModelResNet(40)
        path = path_configs.FACE_BASE_MODEL if not alt_path else alt_path
        model.load_state_dict(torch.load(path))
        model = model.to(device)
        return model

    elif model_type == "transformer":
        model = face_models.get_FaceVisionTransformer(40)
        path = path_configs.FACE_VIT_MODEL if not alt_path else alt_path
        model.load_state_dict(torch.load(path))
        model = model.to(device)
        return model

    elif model_type == "dense":
        model = face_models.get_FaceModelDenseNet(40)
        path = path_configs.FACE_DENSE_MODEL if not alt_path else alt_path
        model.load_state_dict(torch.load(path))
        model = model.to(device)
        return model


In [8]:
def train_model(model:nn.Module,
                criterion:nn.Module,
                optimizer:torch.optim.Optimizer,
                epochs:int,
                train_ds:torch.utils.data.Dataset,
                val_ds:torch.utils.data.Dataset,
                batch_size:int =32,
                num_workers:int=0,
                amount_labels:int=40,
                val:bool=True):
    torch.cuda.empty_cache()
    train_dl = DataLoader(train_ds, batch_size=batch_size,shuffle=True,num_workers=num_workers)
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0.0
        for model_inputs, labels in tqdm(train_dl):
            model_inputs = model_inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            model_outputs = model(model_inputs)
            loss = criterion(model_outputs, labels)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()
        print(f"Epoch: {epoch + 1:2}",
              f"Train Loss: {epoch_loss / len(train_dl):.5f}")

        if val:
            torch.cuda.empty_cache()
            val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=False,num_workers=num_workers)
            val_loss = 0.0
            num_corrects = 0
            model.eval()
            for model_inputs, labels in val_dl:
                model_inputs = model_inputs.to(device)
                labels = labels.to(device)
                model_outputs = model(model_inputs)
                loss = criterion(model_outputs, labels)
                val_loss += loss.item()
                num_corrects += int((model_outputs.round() == labels).sum())
            print(f"Val Loss: {val_loss / len(val_dl):.5f}",
                  f"Val Accuracy (all attributes): {num_corrects / (len(val_ds) * amount_labels)}")


def test_model(model:nn.Module,
               test_ds:torch.utils.data.Dataset,
               batch_size:int,
               num_workers:int =0,
               amount_labels=40):
    test_dl = DataLoader(test_ds, batch_size=batch_size,num_workers=num_workers,shuffle=False)
    model.eval()
    num_corrects = 0
    for model_inputs, labels in tqdm(test_dl):
        model_inputs = model_inputs.to(device)
        labels = labels.to(device)
        model_outputs = model(model_inputs)
        num_corrects += int((model_outputs.round() == labels).sum())
    print(f"Test Accuracy (all attributes): {num_corrects / (len(test_ds) * amount_labels)}")

In [15]:
model_vit_base_np = load_saved_model("transformer", alt_path=path_configs.FACE_VIT_NP_MODEL)
test_model(model=model_vit_base_np, test_ds=test_dataset_vit, batch_size=8,num_workers=0)

./privacyflow/models_trained/face_vit_np_model.pl
<class 'torchvision.models.vision_transformer.VisionTransformer'>


  0%|          | 0/2496 [00:00<?, ?it/s]

Test Accuracy (all attributes): 0.8046876565474401


In [9]:
#ResNet not pretrained
model_cnn_base_np = face_models.get_FaceModelResNet(40,pretrained=False).to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model_cnn_base_np.parameters(), lr=0.01)

train_model(model=model_cnn_base_np,
            criterion=criterion,
            optimizer=optimizer,
            epochs=20,
            train_ds=train_dataset_cnn,
            val_ds=val_dataset_cnn,
            batch_size=128,
            num_workers=8,
            amount_labels=40)

test_model(model=model_cnn_base_np, test_ds=test_dataset_cnn, batch_size=128, num_workers=8)
#torch.save(model_cnn_base.state_dict(), path_configs.FACE_BASE_MODEL)

  0%|          | 0/156 [00:00<?, ?it/s]

Test Accuracy (all attributes): 0.5497344955415289


In [18]:
#ResNet
model_cnn_base = face_models.get_FaceModelResNet(40).to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model_cnn_base.fc.parameters(), lr=0.01)

train_model(model=model_cnn_base,
            criterion=criterion,
            optimizer=optimizer,
            epochs=2,
            train_ds=train_dataset_cnn,
            val_ds=val_dataset_cnn,
            batch_size=128,
            num_workers=8,
            amount_labels=40)

test_model(model=model_cnn_base, test_ds=test_dataset_cnn, batch_size=128, num_workers=8)
#torch.save(model_cnn_base.state_dict(), path_configs.FACE_BASE_MODEL)

  0%|          | 0/636 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
#Vision Transformer
model_vit_base = face_models.get_FaceVisionTransformer(40).to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model_vit_base.heads.parameters(), lr=0.01)

train_model(model=model_vit_base,
            criterion=criterion,
            optimizer=optimizer,
            epochs=2,
            train_ds=train_dataset_vit,
            val_ds=train_dataset_vit,
            batch_size=32,
            num_workers=8,
            amount_labels=40,
            val=False)

test_model(model=model_vit_base, test_ds=test_dataset_vit, batch_size=128,num_workers=8)
#torch.save(model_vit_base.state_dict(), path_configs.FACE_VIT_MODEL)

In [9]:
#Vision Transformer not pretrained
model_vit_base_np = face_models.get_FaceVisionTransformer(40,pretrained=False).to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model_vit_base_np.parameters(), lr=0.01)

train_model(model=model_vit_base_np,
            criterion=criterion,
            optimizer=optimizer,
            epochs=6,
            train_ds=train_dataset_vit,
            val_ds=train_dataset_vit,
            batch_size=32,
            num_workers=4,
            amount_labels=40,
            val=False)

test_model(model=model_vit_base_np, test_ds=test_dataset_vit, batch_size=128,num_workers=8)
#torch.save(model_vit_base.state_dict(), path_configs.FACE_VIT_MODEL)

  0%|          | 0/5087 [00:00<?, ?it/s]

Epoch:  1 Train Loss: 0.43196


  0%|          | 0/5087 [00:00<?, ?it/s]

Epoch:  2 Train Loss: 0.42626


  0%|          | 0/5087 [00:00<?, ?it/s]

Epoch:  3 Train Loss: 0.42481


  0%|          | 0/5087 [00:00<?, ?it/s]

Epoch:  4 Train Loss: 0.42426


  0%|          | 0/5087 [00:00<?, ?it/s]

KeyboardInterrupt: 

## Model + DP-SGD

In [22]:
def train_model_dpsgd(model:nn.Module,
                criterion:nn.Module,
                optimizer:torch.optim.Optimizer,
                train_dl:torch.utils.data.DataLoader,
                privacy_engine:opacus.PrivacyEngine,
                val_dl:torch.utils.data.DataLoader = None,
                len_val_ds:int = 1,
                epochs:int = 5,
                amount_labels:int=40,
                max_epsilon:int= 10,
                val:bool=True):
    epsilon_reached = False
    for epoch in range(epochs):
        if epsilon_reached:
            break
        print(f"Start Training Epoch: {epoch + 1:2}")
        model.train()
        epoch_loss = 0.0
        for model_inputs, labels in tqdm(train_dl):
            try:
                model_inputs = model_inputs.to(device)
                labels = labels.to(device)
                #Forward + Backprop
                optimizer.zero_grad()
                model_outputs = model(model_inputs)
                loss = criterion(model_outputs, labels)
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()
                #Check if epsilon exceeds threshold after each batch
                if max_epsilon < privacy_engine.accountant.get_epsilon(delta=1e-6):
                    print(f"ε Value {max_epsilon:2} reached in Epoch {epoch:2}")
                    epsilon_reached = True
                    break
            except RuntimeError:
                continue

        print(f"Finished Training Epoch: {epoch + 1:2}",
              f"Train Loss: {epoch_loss / len(train_dl):.5f}",
              f"ε:{privacy_engine.accountant.get_epsilon(delta=1e-6):.5f}")

        if val:
            val_loss = 0.0
            num_corrects = 0
            model.eval()
            for model_inputs, labels in val_dl:
                try:
                    model_inputs = model_inputs.to(device)
                    labels = labels.to(device)
                    model_outputs = model(model_inputs)
                    loss = criterion(model_outputs, labels)
                    val_loss += loss.item()
                    num_corrects += int((model_outputs.round() == labels).sum())
                except RuntimeError:
                    continue
            print(f"Val Loss: {val_loss / len(val_dl):.5f}",
                  f"Val Accuracy (all attributes): {num_corrects / (len_val_ds * amount_labels)}")
        print("-------------------------------------------------------------")

In [7]:
model_cnn_dpsgd = face_models.get_FaceModelResNet(40).to(device)
model_cnn_dpsgd = ModuleValidator.fix(model_cnn_dpsgd)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model_cnn_dpsgd.fc.parameters(), lr=0.01)

In [8]:
data_augmentation_train = torchvision.transforms.Compose([
    torchvision.transforms.AutoAugment(),
    torchvision.transforms.ToTensor(),
])
train_dataset_cnn = faces_dataset.FacesDataset(label_cols=label_columns, mode="train",transform=data_augmentation_train)
train_dl = DataLoader(
    dataset=train_dataset_cnn,
    batch_size=8,
    shuffle=True
)

In [9]:
privacy_engine= PrivacyEngine()
model_cnn_dpsgd, optimizer, train_dl = privacy_engine.make_private(
    module=model_cnn_dpsgd,
    optimizer=optimizer,
    data_loader=train_dl,
    noise_multiplier=1.0, #Wie viel Rauschen wird hinzugefügt - Höher = weniger Rauschen
    max_grad_norm=1.0 #Gradienten größer als dieser Wert werden geclippt
)

In [None]:
train_model_dpsgd(model=model_cnn_dpsgd,
                  criterion=criterion,
                  optimizer=optimizer,
                  train_dl=train_dl,
                  privacy_engine=privacy_engine,
                  max_epsilon=1,
                  epochs=10,
                  val=False)

In [11]:
model_cnn_dpsgd.train()
for epoch in range(3):
    for dp_inputs,labels in tqdm(train_dl):
        try:
            dp_inputs = dp_inputs.to(device)
            labels = labels.to(device)

            model_cnn_dpsgd.zero_grad()
            optimizer.zero_grad()
            outputs = model_cnn_dpsgd(dp_inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        except RuntimeError as error:
            continue
        except:
            break
    print(f"Finished Training Epoch {epoch+1}")
    print(f"ε Value {privacy_engine.accountant.get_epsilon(delta=1e-6):.5f}")


  0%|          | 0/20347 [00:00<?, ?it/s]

0.018844130787517545
0.018844130787517545
---------------


KeyboardInterrupt: 

In [20]:
data_augmentation_train = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
])

train_dataset_cnn = faces_dataset.FacesDataset(label_cols=label_columns, mode="train",
                                               transform=data_augmentation_train)

train_dataset_cnn = faces_dataset.FacesDataset(label_cols=label_columns, mode="val",
                                               transform=data_augmentation_train)
train_dataloader_cnn = DataLoader(
    dataset=train_dataset_cnn,
    batch_size=4,
    shuffle=True
)
val_dataloader_cnn = DataLoader(
    dataset=val_dataset_cnn,
    batch_size=4,
    shuffle=False
)
test_dataloader_cnn = DataLoader(
    dataset=test_dataset_cnn,
    batch_size=4,
    shuffle=False
)

NameError: name 'val_dataset_cnn' is not defined

In [23]:
model_cnn_dpsgd = face_models.get_FaceModelResNet(40).to(device)
model_cnn_dpsgd = ModuleValidator.fix(model_cnn_dpsgd)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model_cnn_dpsgd.fc.parameters(), lr=0.01)


#test_model(model=model_cnn_base, test_dl=test_dataloader_cnn, len_test_ds=len(test_dataset_cnn))

#torch.save(model_cnn_base.state_dict(), path_configs.FACE_BASE_MODEL)

Training Epoch:  1


  0%|          | 0/40693 [00:00<?, ?it/s]

KeyboardInterrupt: 