# Experimente - Datensatz Gesichter

## Settings und Imports

In [1]:
# suppress warnings
import warnings

warnings.filterwarnings('ignore')

#autoreload other packages when code changed
%load_ext autoreload
%autoreload 2

In [2]:
import torch
torch.manual_seed(42)  #Reproduzierbarkeit
from torch import nn
from torch.utils.data import DataLoader
import torchvision

from opacus import PrivacyEngine
from opacus.accountants import RDPAccountant

import pandas as pd
from tqdm.notebook import tqdm

In [3]:
#Own Code
from privacyflow.configs import path_configs
from privacyflow.datasets import faces_dataset
from privacyflow.models import face_models

In [4]:
#Check if GPU is available
if torch.cuda.is_available():
    print("GPU will be used")
    device = torch.device('cuda')
else:
    print("No GPU available")
    device = torch.device('cpu')

GPU will be used


## Data Prep

In [28]:
label_columns = 'all'  #40 attributes

data_augmentation_train = torchvision.transforms.Compose([

    # torchvision.transforms.Resize((224,224)), # Resize is done by model
    torchvision.transforms.AutoAugment(),
    torchvision.transforms.ToTensor(),
])

data_augmentation_test = torchvision.transforms.Compose([
    # torchvision.transforms.Resize((224,224)), # Resize is done by model
    torchvision.transforms.ToTensor()
])

train_dataset = faces_dataset.FacesDataset(label_cols=label_columns, mode="train", transform=data_augmentation_train)
train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=128,
    shuffle=True,
    num_workers=8
)

val_dataset = faces_dataset.FacesDataset(label_cols=label_columns, mode="val", transform=data_augmentation_test)
val_dataloader = DataLoader(
    dataset=val_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=8
)

test_dataset = faces_dataset.FacesDataset(label_cols=label_columns, mode="test", transform=data_augmentation_test)
test_dataloader = DataLoader(
    dataset=test_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=8
)

## Model - Base

In [29]:
model_base_all_attributes = face_models.get_FaceModelBase(40).to(device)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model_base_all_attributes.parameters(), lr=0.01)

In [30]:
#Training
for epoch in range(5):
    model_base_all_attributes.train()
    epoch_loss = 0.0
    for model_inputs, labels in tqdm(train_dataloader):
        model_inputs = model_inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        model_outputs = model_base_all_attributes(model_inputs)
        loss = criterion(model_outputs, labels)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
    val_loss = 0.0
    num_corrects = 0
    model_base_all_attributes.eval()
    for model_inputs, labels in val_dataloader:
        model_inputs = model_inputs.to(device)
        labels = labels.to(device)
        model_outputs = model_base_all_attributes(model_inputs)
        loss = criterion(model_outputs, labels)
        val_loss += loss.item()

        num_corrects += int((model_outputs.round() == labels).sum())

    print(f"Epoch: {epoch + 1:2}",
          f"Train Loss: {epoch_loss / len(train_dataloader):.5f}",
          f"Val Loss: {val_loss / len(val_dataloader):.5f}",
          f"Val Accuracy (all attributes): {num_corrects / (len(val_dataset) * 40)}"
          )

  0%|          | 0/1272 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [8]:
#Test
num_corrects = 0
for model_inputs, labels in test_dataloader:
    model_inputs = model_inputs.to(device)
    labels = labels.to(device)
    model_outputs = model_base_all_attributes(model_inputs)
    num_corrects += int((model_outputs.round() == labels).sum())
print(f"Test Accuracy (all attributes): {num_corrects / (len(test_dataset) * 40)}")

Test Accuracy (all attributes): 0.8580315098687507


## Membership Inference Attacke

In [9]:
#Create Dataset and Dataloader for Shadow Modell
shadow_model_ds1 = faces_dataset.FacesDataset(label_cols=label_columns, mode="all", transform=data_augmentation_train)
shadow_model_dl1 = DataLoader(dataset=shadow_model_ds1, batch_size=128, shuffle=True, num_workers=8)

shadow_model_ds2 = faces_dataset.FacesDataset(label_cols=label_columns, mode="train", transform=data_augmentation_train)
shadow_model_dl2 = DataLoader(dataset=shadow_model_ds2, batch_size=128, shuffle=True, num_workers=8)

shadow_model_ds3 = faces_dataset.FacesDataset(label_cols=label_columns, mode="custom",
                                              transform=data_augmentation_train, custom_range=range(1, 100_000))
shadow_model_dl3 = DataLoader(dataset=shadow_model_ds3, batch_size=128, shuffle=True, num_workers=8)

shadow_model_ds4 = faces_dataset.FacesDataset(label_cols=label_columns, mode="custom",
                                              transform=data_augmentation_train, custom_range=range(100_000, 202_600))
shadow_model_dl4 = DataLoader(dataset=shadow_model_ds4, batch_size=128, shuffle=True, num_workers=8)

shadow_model_ds5 = faces_dataset.FacesDataset(label_cols=label_columns, mode="custom",
                                              transform=data_augmentation_train, custom_range=range(50_000, 150_000))
shadow_model_dl5 = DataLoader(dataset=shadow_model_ds5, batch_size=128, shuffle=True, num_workers=8)

shadow_model_ds6 = faces_dataset.FacesDataset(label_cols=label_columns, mode="custom",
                                              transform=data_augmentation_train,
                                              custom_range=list(range(50_000, 100_000)) + list(range(150_000, 202_600)))
shadow_model_dl6 = DataLoader(dataset=shadow_model_ds6, batch_size=128, shuffle=True, num_workers=8)

In [10]:
#Train Shadow Models
shadow_models = []
for i,datal in enumerate([shadow_model_dl1, shadow_model_dl2, shadow_model_dl3,
              shadow_model_dl4, shadow_model_dl5, shadow_model_dl6]):
    print(f"Start training of shadow model {i+1}")
    shadow_model = face_models.get_FaceModelBase(40).to(device)

    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(shadow_model.parameters(), lr=0.01)

    for epoch in range(5):
        #print(f"Epoch {epoch+1} for shadow model {i+1}")
        for model_inputs,labels in datal:
            model_inputs = model_inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            model_outputs = shadow_model(model_inputs)
            loss = criterion(model_outputs, labels)
            loss.backward()
            optimizer.step()
    shadow_models.append(shadow_model)
    #print(f"Finished training of shadow model {i+1}")

Start training of shadow model 1
Epoch 1 for shadow model 1
Finished training of shadow model 1
Start training of shadow model 2
Epoch 1 for shadow model 2
Finished training of shadow model 2
Start training of shadow model 3
Epoch 1 for shadow model 3
Finished training of shadow model 3
Start training of shadow model 4
Epoch 1 for shadow model 4
Finished training of shadow model 4
Start training of shadow model 5
Epoch 1 for shadow model 5
Finished training of shadow model 5
Start training of shadow model 6
Epoch 1 for shadow model 6
Finished training of shadow model 6


In [11]:
#Create Dataset for meta classifier
#Datasets with included datapoints
included_ranges = [
    range(1,202_600), #used in shadow_model_1
    range(1,162_771), #used in shadow_model_2
    range(1,100_000), #used in shadow_model_3
    range(100_000,202_600), #used in shadow_model_4
    range(50_000,150_000), #used in shadow_model_5
    list(range(50_000,100_000))+list(range(100_000,150_000)), #used in shadow_model_6
]

In [24]:
#Create Dataset for meta classifier
#get distributions from shadow models

dfs_total = []
ds = faces_dataset.FacesDataset(label_cols=label_columns, mode="all", transform=data_augmentation_test)
dl = DataLoader(
    dataset=ds,
    batch_size=128,
    shuffle=False,
    num_workers=8
)

for index, (shadow_model, included_range) in enumerate(zip(shadow_models, included_ranges)):
    print(f"Start getting data from shadow model 1")
    shadow_model = shadow_model.to(device)
    shadow_model.eval()
    dfs_batches = []
    for model_inputs,labels in dl:
        model_inputs = model_inputs.to(device)
        preds = shadow_model(model_inputs)
        preds_df = pd.DataFrame(preds.cpu().detach().numpy())
        dfs_batches.append(preds_df)
    df_epoch = pd.concat(dfs_batches)
    df_epoch['target'] = [1 if index in included_range else 0 for index in range(1,202_600)]
    dfs_total.append(df_epoch)

dfs_total = pd.concat(dfs_total).reset_index()
dfs_total.to_csv(path_configs.FACE_MI_DATA, index=False)

Start getting data from shadow model 1


KeyboardInterrupt: 

In [None]:
#Train meta classifier
meta_classifier_ds = faces_dataset.FaceMIDataset(dfs_total, target_column_name='target')
meta_classifier_dl = DataLoader(dataset=meta_classifier_ds,batch_size=128,shuffle=False)

mi_model = face_models.FaceMIModel(input_size=40, output_size=2).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mi_model.parameters(), lr=0.01)

for epoch in range(1):
    for model_inputs,label in meta_classifier_dl:
        model_inputs.to(device)
        label.to(device)

        preds = mi_model(model_inputs)
        loss = criterion(preds,label)
        loss.backward()
        optimizer.step()