# Experimente - Datensatz Gesichter

## Settings und Imports

In [1]:
# suppress warnings
import warnings

warnings.filterwarnings('ignore')

#autoreload other packages when code changed
%load_ext autoreload
%autoreload 2

In [2]:
import torch

torch.manual_seed(20)  #Reproduzierbarkeit
from torch import nn
from torch.utils.data import DataLoader
import torchvision

import copy
from opacus import PrivacyEngine
from opacus.accountants import RDPAccountant

import pandas as pd
from tqdm.notebook import tqdm

In [3]:
#Own Code
from privacyflow.configs import path_configs
from privacyflow.datasets import faces_dataset
from privacyflow.models import face_models

In [4]:
#Check if GPU is available
if torch.cuda.is_available():
    print("GPU will be used")
    device = torch.device('cuda')
else:
    print("No GPU available")
    device = torch.device('cpu')

GPU will be used


## Data Prep

In [88]:
label_columns = 'all'  #40 attributes

data_augmentation_train = torchvision.transforms.Compose([

    # torchvision.transforms.Resize((224,224)), # Resize is done by model
    torchvision.transforms.AutoAugment(),
    torchvision.transforms.ToTensor(),
])

data_augmentation_test = torchvision.transforms.Compose([
    # torchvision.transforms.Resize((224,224)), # Resize is done by model
    torchvision.transforms.ToTensor()
])

train_dataset = faces_dataset.FacesDataset(label_cols=label_columns, mode="train", transform=data_augmentation_train)
train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=128,
    shuffle=True,
    num_workers=8
)

val_dataset = faces_dataset.FacesDataset(label_cols=label_columns, mode="val", transform=data_augmentation_test)
val_dataloader = DataLoader(
    dataset=val_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=8
)

test_dataset = faces_dataset.FacesDataset(label_cols=label_columns, mode="test", transform=data_augmentation_test)
test_dataloader = DataLoader(
    dataset=test_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=8
)

In [89]:
for (input, label) in train_dataloader:
    print(input.shape)
    break

torch.Size([128, 3, 218, 178])


## Model - Base

In [6]:
model_base_all_attributes = face_models.get_FaceModelBase(40).to(device)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model_base_all_attributes.parameters(), lr=0.01)

In [7]:
#Training
for epoch in range(8):
    model_base_all_attributes.train()
    epoch_loss = 0.0
    for model_inputs, labels in tqdm(train_dataloader):
        model_inputs = model_inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        model_outputs = model_base_all_attributes(model_inputs)
        loss = criterion(model_outputs, labels)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
    val_loss = 0.0
    num_corrects = 0
    model_base_all_attributes.eval()
    for model_inputs, labels in val_dataloader:
        model_inputs = model_inputs.to(device)
        labels = labels.to(device)
        model_outputs = model_base_all_attributes(model_inputs)
        loss = criterion(model_outputs, labels)
        val_loss += loss.item()

        num_corrects += int((model_outputs.round() == labels).sum())

    print(f"Epoch: {epoch + 1:2}",
          f"Train Loss: {epoch_loss / len(train_dataloader):.5f}",
          f"Val Loss: {val_loss / len(val_dataloader):.5f}",
          f"Val Accuracy (all attributes): {num_corrects / (len(val_dataset) * 40)}"
          )

  0%|          | 0/1272 [00:00<?, ?it/s]

Epoch:  1 Train Loss: 0.41122 Val Loss: 0.33358 Val Accuracy (all attributes): 0.8578346000906025


  0%|          | 0/1272 [00:00<?, ?it/s]

Epoch:  2 Train Loss: 0.27866 Val Loss: 0.23901 Val Accuracy (all attributes): 0.8961959530880355


  0%|          | 0/1272 [00:00<?, ?it/s]

Epoch:  3 Train Loss: 0.23990 Val Loss: 0.24795 Val Accuracy (all attributes): 0.8901696280263754


  0%|          | 0/1272 [00:00<?, ?it/s]

Epoch:  4 Train Loss: 0.22580 Val Loss: 0.21224 Val Accuracy (all attributes): 0.9069185080787235


  0%|          | 0/1272 [00:00<?, ?it/s]

Epoch:  5 Train Loss: 0.21757 Val Loss: 0.22101 Val Accuracy (all attributes): 0.903400110736397


  0%|          | 0/1272 [00:00<?, ?it/s]

Epoch:  6 Train Loss: 0.21171 Val Loss: 0.21374 Val Accuracy (all attributes): 0.9071236220868777


  0%|          | 0/1272 [00:00<?, ?it/s]

Epoch:  7 Train Loss: 0.20751 Val Loss: 0.20381 Val Accuracy (all attributes): 0.9107124880455026


  0%|          | 0/1272 [00:00<?, ?it/s]

Epoch:  8 Train Loss: 0.20394 Val Loss: 0.21619 Val Accuracy (all attributes): 0.9067712790053858


In [8]:
#Test
num_corrects = 0
for model_inputs, labels in test_dataloader:
    model_inputs = model_inputs.to(device)
    labels = labels.to(device)
    model_outputs = model_base_all_attributes(model_inputs)
    num_corrects += int((model_outputs.round() == labels).sum())
print(f"Test Accuracy (all attributes): {num_corrects / (len(test_dataset) * 40)}")

Test Accuracy (all attributes): 0.9030044584710951


In [10]:
torch.save(model_base_all_attributes.state_dict(), path_configs.FACE_BASE_MODEL)

## Membership Inference Attacke

In [9]:
#Create Dataset and Dataloader for Shadow Modell
shadow_model_ds1 = faces_dataset.FacesDataset(label_cols=label_columns, mode="all", transform=data_augmentation_train)
shadow_model_dl1 = DataLoader(dataset=shadow_model_ds1, batch_size=128, shuffle=True, num_workers=8)

shadow_model_ds2 = faces_dataset.FacesDataset(label_cols=label_columns, mode="train", transform=data_augmentation_train)
shadow_model_dl2 = DataLoader(dataset=shadow_model_ds2, batch_size=128, shuffle=True, num_workers=8)

shadow_model_ds3 = faces_dataset.FacesDataset(label_cols=label_columns, mode="custom",
                                              transform=data_augmentation_train, custom_range=range(1, 100_000))
shadow_model_dl3 = DataLoader(dataset=shadow_model_ds3, batch_size=128, shuffle=True, num_workers=8)

shadow_model_ds4 = faces_dataset.FacesDataset(label_cols=label_columns, mode="custom",
                                              transform=data_augmentation_train, custom_range=range(100_000, 202_600))
shadow_model_dl4 = DataLoader(dataset=shadow_model_ds4, batch_size=128, shuffle=True, num_workers=8)

shadow_model_ds5 = faces_dataset.FacesDataset(label_cols=label_columns, mode="custom",
                                              transform=data_augmentation_train, custom_range=range(50_000, 150_000))
shadow_model_dl5 = DataLoader(dataset=shadow_model_ds5, batch_size=128, shuffle=True, num_workers=8)

shadow_model_ds6 = faces_dataset.FacesDataset(label_cols=label_columns, mode="custom",
                                              transform=data_augmentation_train,
                                              custom_range=list(range(50_000, 100_000)) + list(range(150_000, 202_600)))
shadow_model_dl6 = DataLoader(dataset=shadow_model_ds6, batch_size=128, shuffle=True, num_workers=8)

In [10]:
#Train Shadow Models
shadow_models = []
for i, datal in enumerate([shadow_model_dl1, shadow_model_dl2, shadow_model_dl3,
                           shadow_model_dl4, shadow_model_dl5, shadow_model_dl6]):
    print(f"Start training of shadow model {i + 1}")
    shadow_model = face_models.get_FaceModelBase(40).to(device)

    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(shadow_model.parameters(), lr=0.01)

    for epoch in range(8):
        #print(f"Epoch {epoch+1} for shadow model {i+1}")
        for model_inputs, labels in datal:
            model_inputs = model_inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            model_outputs = shadow_model(model_inputs)
            loss = criterion(model_outputs, labels)
            loss.backward()
            optimizer.step()
    shadow_models.append(shadow_model)
    #print(f"Finished training of shadow model {i+1}")

Start training of shadow model 1
Start training of shadow model 2
Start training of shadow model 3
Start training of shadow model 4
Start training of shadow model 5
Start training of shadow model 6


In [11]:
#Create Dataset for meta classifier
#Datasets with included datapoints
included_ranges = [
    range(1, 202_600),  #used in shadow_model_1
    range(1, 162_771),  #used in shadow_model_2
    range(1, 100_000),  #used in shadow_model_3
    range(100_000, 202_600),  #used in shadow_model_4
    range(50_000, 150_000),  #used in shadow_model_5
    list(range(50_000, 100_000)) + list(range(100_000, 150_000)),  #used in shadow_model_6
]

In [12]:
#Create Dataset for meta classifier
#get distributions from shadow models

dfs_total = []
ds = faces_dataset.FacesDataset(label_cols=label_columns, mode="all", transform=data_augmentation_test)
dl = DataLoader(
    dataset=ds,
    batch_size=128,
    shuffle=False,
    num_workers=8
)

for index, (shadow_model, included_range) in enumerate(zip(shadow_models, included_ranges)):
    print(f"Start getting data from shadow model {index + 1}")
    shadow_model = shadow_model.to(device)
    shadow_model.eval()
    dfs_batches = []
    for model_inputs, labels in dl:
        model_inputs = model_inputs.to(device)
        preds = shadow_model(model_inputs)
        preds_df = pd.DataFrame(preds.cpu().detach().numpy())
        dfs_batches.append(preds_df)
    df_epoch = pd.concat(dfs_batches)
    df_epoch['target'] = [int(index in included_range) for index in range(1, 202_600)]
    dfs_total.append(df_epoch)

dfs_total = pd.concat(dfs_total).reset_index()
dfs_total.to_csv(path_configs.FACE_MI_DATA, index=False)

Start getting data from shadow model 1
Start getting data from shadow model 2
Start getting data from shadow model 3
Start getting data from shadow model 4
Start getting data from shadow model 5
Start getting data from shadow model 6


In [73]:
#Train meta classifier
dfs_total = pd.read_csv(path_configs.FACE_MI_DATA)
if "index" in dfs_total:
    dfs_total = dfs_total.drop(columns='index')
meta_classifier_ds = faces_dataset.FaceMIDataset(dfs_total, target_column_name='target')
meta_classifier_dl = DataLoader(dataset=meta_classifier_ds, batch_size=128, shuffle=True, num_workers=8)

mi_model = face_models.FaceMIModel(input_size=40, output_size=1)
mi_model = mi_model.to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(mi_model.parameters(), lr=0.001)

for epoch in range(30):
    epoch_loss = 0.0
    num_correct_epoch = 0
    time_ones = 0
    print(f"Train Epoch {epoch + 1}")
    for model_inputs, label in tqdm(meta_classifier_dl):
        model_inputs = model_inputs.to(device)
        label = label.to(device)
        optimizer.zero_grad()
        preds = mi_model(model_inputs).round()

        num_correct_epoch += (preds.round() == label).sum()
        time_ones += preds.round().sum()

        loss = criterion(preds, label)
        epoch_loss += loss.item()

        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1} loss: {epoch_loss:.5f}, Acc: {num_correct_epoch / len(meta_classifier_ds):.5f}")
    print(f"Predicted Label One {time_ones} times and Label Zero {len(meta_classifier_ds) - time_ones} times")

Train Epoch 1


  0%|          | 0/9497 [00:00<?, ?it/s]

Epoch 1 loss: 498897.81840, Acc: 0.47468
Predicted Label One 547056.0 times and Label Zero 668538.0 times
Train Epoch 2


  0%|          | 0/9497 [00:00<?, ?it/s]

Epoch 2 loss: 499025.02948, Acc: 0.47454
Predicted Label One 547490.0 times and Label Zero 668104.0 times
Train Epoch 3


  0%|          | 0/9497 [00:00<?, ?it/s]

Epoch 3 loss: 498827.50590, Acc: 0.47475
Predicted Label One 547276.0 times and Label Zero 668318.0 times
Train Epoch 4


  0%|          | 0/9497 [00:00<?, ?it/s]

Epoch 4 loss: 499425.00000, Acc: 0.47412
Predicted Label One 547501.0 times and Label Zero 668093.0 times
Train Epoch 5


  0%|          | 0/9497 [00:00<?, ?it/s]

Epoch 5 loss: 499026.26769, Acc: 0.47454
Predicted Label One 546952.0 times and Label Zero 668642.0 times
Train Epoch 6


  0%|          | 0/9497 [00:00<?, ?it/s]

Epoch 6 loss: 499131.41215, Acc: 0.47443
Predicted Label One 547315.0 times and Label Zero 668279.0 times
Train Epoch 7


  0%|          | 0/9497 [00:00<?, ?it/s]

Epoch 7 loss: 499317.80661, Acc: 0.47424
Predicted Label One 547510.0 times and Label Zero 668084.0 times
Train Epoch 8


  0%|          | 0/9497 [00:00<?, ?it/s]

Epoch 8 loss: 499418.18986, Acc: 0.47413
Predicted Label One 547176.0 times and Label Zero 668418.0 times
Train Epoch 9


  0%|          | 0/9497 [00:00<?, ?it/s]

Epoch 9 loss: 499051.13503, Acc: 0.47452
Predicted Label One 547509.0 times and Label Zero 668085.0 times
Train Epoch 10


  0%|          | 0/9497 [00:00<?, ?it/s]

Epoch 10 loss: 499227.15212, Acc: 0.47433
Predicted Label One 547429.0 times and Label Zero 668165.0 times
Train Epoch 11


  0%|          | 0/9497 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [68]:
#Large Mi Model
dfs_total = pd.read_csv(path_configs.FACE_MI_DATA)
if "index" in dfs_total:
    dfs_total = dfs_total.drop(columns='index')
meta_classifier_ds = faces_dataset.FaceMIDataset(dfs_total, target_column_name='target')
meta_classifier_dl = DataLoader(dataset=meta_classifier_ds, batch_size=128, shuffle=True, num_workers=8)

mi_model_large = face_models.FaceMIModelLarge(input_size=40, output_size=1)
mi_model_large = mi_model_large.to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(mi_model_large.parameters(), lr=0.003)

for epoch in range(10):
    epoch_loss = 0.0
    num_correct_epoch = 0
    time_ones = 0
    print(f"Train Epoch {epoch + 1}")
    for model_inputs, label in meta_classifier_dl:
        model_inputs = model_inputs.to(device)
        label = label.to(device)
        optimizer.zero_grad()
        preds = mi_model_large(model_inputs)
        loss = criterion(preds, label)
        loss.backward()

        time_ones += preds.round().sum()
        epoch_loss += loss.item()
        num_correct_epoch += (preds.round() == label).sum()

        optimizer.step()
    print(
        f"Epoch {epoch + 1:2}, Loss: {epoch_loss / len(meta_classifier_dl):.5f}, Acc: {num_correct_epoch / len(meta_classifier_ds):.5f}")
    #print(f"Predicted Label One {time_ones} times and Label Zero {len(meta_classifier_ds) - time_ones} times")

Train Epoch 1
Epoch  1, Loss: 0.62066, Acc: 0.63156
Predicted Label One 0 times and Label Zero 1215594 times
Train Epoch 2
Epoch  2, Loss: 0.60432, Acc: 0.63305
Predicted Label One 0 times and Label Zero 1215594 times
Train Epoch 3
Epoch  3, Loss: 0.59771, Acc: 0.63365
Predicted Label One 0 times and Label Zero 1215594 times
Train Epoch 4
Epoch  4, Loss: 0.59338, Acc: 0.63422
Predicted Label One 0 times and Label Zero 1215594 times
Train Epoch 5
Epoch  5, Loss: 0.58996, Acc: 0.63450
Predicted Label One 0 times and Label Zero 1215594 times
Train Epoch 6
Epoch  6, Loss: 0.58721, Acc: 0.63523
Predicted Label One 0 times and Label Zero 1215594 times
Train Epoch 7
Epoch  7, Loss: 0.58454, Acc: 0.63624
Predicted Label One 0 times and Label Zero 1215594 times
Train Epoch 8
Epoch  8, Loss: 0.58256, Acc: 0.63709
Predicted Label One 0 times and Label Zero 1215594 times
Train Epoch 9
Epoch  9, Loss: 0.58072, Acc: 0.63718
Predicted Label One 0 times and Label Zero 1215594 times
Train Epoch 10
Epoc

In [69]:
##Build Dataset for MI Attack on Model
#1000 Elements from Train and 1000 from Test
mi_attack_ds = faces_dataset.FacesDataset(label_cols=label_columns, mode="custom", transform=data_augmentation_test,
                                          custom_range=list(range(1, 1_001)) + list(range(201_600, 202_600)))
mi_attack_dl = DataLoader(
    dataset=mi_attack_ds,
    batch_size=128,
    shuffle=False,
    num_workers=8
)


def create_dataset_for_mi_attack(model) -> pd.DataFrame:
    model = model.to(device)
    model.eval()
    df_epochs = []
    for model_inputs, _ in mi_attack_dl:
        model_inputs = model_inputs.to(device)
        preds = model(model_inputs)
        preds_df = pd.DataFrame(preds.cpu().detach().numpy())
        df_epochs.append(preds_df)
    df_epochs = pd.concat(df_epochs)
    df_epochs['target'] = [1 if index in range(0, 1_000) else 0 for index in range(0, 2_000)]
    return df_epochs

In [70]:
##Evaluate MI Attack
def eval_mi_attack(df_mi_data, model_mi_attack=mi_model) -> float:
    mi_eval_ds = faces_dataset.FaceMIDataset(df=df_mi_data, target_column_name='target')
    mi_eval_dl = DataLoader(dataset=mi_eval_ds,
                            batch_size=128,
                            shuffle=False,
                            num_workers=8)

    time_ones = 0
    num_correct_included = 0.0
    for inputs_pred, labels_included in mi_eval_dl:
        inputs_pred = inputs_pred.to(device)
        labels_included = labels_included.to(device)
        outputs = model_mi_attack(inputs_pred)
        num_correct_included += (outputs.round() == labels_included).sum()
        time_ones += int(outputs.round().sum())
    accuracy_mi = num_correct_included / len(mi_eval_ds)
    print(f"Accuracy MI:{accuracy_mi:.5f}, Predicted Label One {time_ones} times")
    return accuracy_mi


In [71]:
#MI Attack against Base Model
def mi_attack(model_to_attack, model_for_mi) -> float:
    df = create_dataset_for_mi_attack(model_to_attack)
    return eval_mi_attack(df, model_mi_attack=model_for_mi)

In [72]:
acc = mi_attack(model_base_all_attributes, mi_model)
acc_large = mi_attack(model_base_all_attributes, mi_model_large)


Accuracy MI:0.50350, Predicted Label One 1721 times
Accuracy MI:0.49950, Predicted Label One 1705 times


## Model Inversion

White Box Model Inversion (Reconstruction Attack)

In [5]:
#Create Copy of the base model (which we will attack)
model_base = face_models.get_FaceModelBase(40)
model_base.load_state_dict(torch.load(path_configs.FACE_BASE_MODEL))
model_base = model_base.to(device)

We will use 3 start tensors and try to recreate an image, with the labels from the first image.
The first tensor is a random initialized one, the second has all values initialized to 0.5. The last tensor is already an image, which looks quite similar to the target labels

In [208]:
transform_model_inversion = torchvision.transforms.Compose([
    torchvision.transforms.Resize((224, 224)),  # Resize is done by model
    torchvision.transforms.ToTensor()
])
model_inversion_start_tensors = faces_dataset.FacesDataset(label_cols='all', mode="custom",
                                                           transform=data_augmentation_test,
                                                           custom_range=list(range(1, 2)) + list(range(202576, 202577)))

#Target
target_label = model_inversion_start_tensors[0][1].unsqueeze(0).to(device)

#Start tensors
tensor1 = torch.rand([3, 224, 224], dtype=torch.float32, device=device).unsqueeze(0)
tensor2 = torch.zeros([3, 224, 224], dtype=torch.float, device=device).unsqueeze(0) + 0.5
tensor3 = model_inversion_start_tensors[1][0].to(device).unsqueeze(0)

In [211]:
def reconstruktion_attack(model: torchvision.models.resnet.ResNet, tensor: torch.Tensor, target: torch.Tensor,
                          learning_rate: float = 0.0001, num_epochs: int = 10_000, optimizer_rec="adam") -> torch.Tensor:
    tensor.requires_grad = True  #Should be true since we update the tensor according to its gradients
    optimizer_rec=torch.optim.Adam([tensor], lr=learning_rate) if optimizer_rec=="adam" else torch.optim.SGD([tensor],lr=learning_rate)
    for epoch in range(num_epochs):
        optimizer_rec.zero_grad()
        output = model(tensor)
        loss = nn.BCELoss()(output, target)
        loss.backward()
        optimizer_rec.step()

    tensor.require_grad = False
    return tensor

In [212]:
# tensor1 = reconstruktion_attack(model=model_base, tensor=tensor1, target=target_label)
# tensor2 = reconstruktion_attack(model=model_base, tensor=tensor2, target=target_label)
tensor3 = reconstruktion_attack(model=model_base, tensor=tensor3, target=target_label)

In [213]:
# torchvision.transforms.ToPILImage()(tensor1[0]).show()
# torchvision.transforms.ToPILImage()(tensor2[0]).show()
torchvision.transforms.ToPILImage()(tensor3[0]).show()

In [216]:
model_base(tensor3).round() == target_label

tensor([[True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True]], device='cuda:0')