## Preparation

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision import datasets
from IPython.display import clear_output
from tqdm import trange

In [2]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

dataset = datasets.CIFAR10("./data/", train=True, download=True, transform=transform)

train_target_dataset = torch.utils.data.Subset(dataset, torch.arange(0, 7500))
train_target_loader  = torch.utils.data.DataLoader(train_target_dataset, batch_size=128, shuffle=True, num_workers=1)

test_target_dataset = torch.utils.data.Subset(dataset, torch.arange(7500,15000))
test_target_loader  = torch.utils.data.DataLoader(test_target_dataset, batch_size=256, shuffle=True, num_workers=1)

train_shadow_dataset = torch.utils.data.Subset(dataset, torch.arange(22500,30000))
train_shadow_loader  = torch.utils.data.DataLoader(train_shadow_dataset, batch_size=128, shuffle=True, num_workers=1)

test_shadow_dataset = torch.utils.data.Subset(dataset, torch.arange(30000,37500))
test_shadow_loader  = torch.utils.data.DataLoader(test_shadow_dataset, batch_size=128, shuffle=True, num_workers=1)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 31034543.96it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data/


In [3]:
"""
We have to make the architecture available to be able to load models from file.
"""

class CNN(nn.Module):

    def __init__(self):
        super().__init__()

        self.conv1 = nn.Conv2d(3,  32, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, padding=2)

        self.pool1 = nn.MaxPool2d(kernel_size=4)
        self.pool2 = nn.MaxPool2d(kernel_size=4)

        self.act = nn.Tanh()

        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):

        x = self.conv1(x)
        x = self.act(x)
        x = self.pool1(x)

        x = self.conv2(x)
        x = self.act(x)
        x = self.pool2(x)

        x = torch.flatten(x, 1)

        x = self.fc1(x)
        x = self.act(x)

        x = self.fc2(x)

        return x

In [4]:
def get_losses(model: nn.Module, data_loader: torch.utils.data.DataLoader) -> torch.tensor:
    """
    Get loss for every sample in <data_loader> of the <model>.
    """

    model.eval()

    losses = torch.zeros(len(data_loader.dataset))

    bz = data_loader.batch_size

    with torch.no_grad():

        for i, (data, target) in enumerate(data_loader):

            data, target = data.cuda(), target.cuda()

            output = model(data)

            loss = F.cross_entropy(output, target, reduction="none").cpu()

            losses[i * bz : (i+1) * bz] = loss

    return losses

## Part I: Baseline Attack

In [5]:
def compare_with_loss(data_loader: torch.utils.data.DataLoader, target_loss: float, *, is_member: bool) -> int:
    """
    Determine the number of members or non-members by comparing each loss value with the target loss.
    We call samples a member if the corresponding loss value is less than the target loss. The opposite holds for non-members.
    This method returns the estimated number of members or non-members.
    """

    num = 0

    # TODO: Get loss values for all samples #
    losses =

    # TODO: Compare each loss value with the target loss and increase the number of members / non-members if necessary #

    return num

SyntaxError: invalid syntax (<ipython-input-5-9367ca29ef1a>, line 11)

In [None]:
model = torch.load("./data/target_model.mdl")

# TODO: Calculate the average training loss #
avg_train_loss =

# TODO: Extract the estimated number of members and non-members #
num_member      =
num_non_member =

baseline_accuracy = (num_member + num_non_member) / (len(train_target_loader.dataset) + len(test_target_loader.dataset))

print("Accuracy", baseline_accuracy)

## Part II: Membership Attack Loss Threshold (MALT)

In [None]:
def computeMetrics(train_losses: torch.tensor, test_losses: torch.tensor) -> float:
    """
    We want to use this method to extract the loss value that achieves the best separation between
    the losses of members <train_losses> and non-members <test_losses>.

    The parameters are the negative losses such that the highest loss is now the lowest one (* -1).

    The general idea is to sort the negative losses and then consider every loss as a possible threshold.

    Hint: Keep in mind that, in general, loss values of members are much smaller than loss values of non-members.
          However, since we operate on negative losses, the opposite is true.
    """

    # TODO: Create a tensor that contains the indices of the sorted, concatenated training and test losses. #
    order =

    # TODO: This tensor holds the ground truth: starting with a '1' for every member followed by a '0' for every non-member #
    membership =

    # Number of correctly classified samples per threshold
    accuracies = torch.zeros_like(membership)

    # Hint: Keep in mind that the idea of a threshold is to separate non-members (on the left) from members (on the right).
    #       Maybe it helps to imagine this threshold as a slider that gets pushed to the right from small loss values
    #       to higher values.

    for threshold in range(len(accuracies)):

        # TODO: For every threshold:
        #       Sum up the correctly classified non-members on the left side of the threshold and the correctly classified
        #       members on the right side #
        accuracies[threshold] =

    # TODO: Use the accuracies to extract the index of the loss value with the best separation #
    loss_index =

    # TODO: Depending on whether the loss_index belongs to training or test samples output the corresponding loss value #
    threshold_loss =

    return -threshold_loss

In [None]:
shadow_model = torch.load("./data/shadow_model.mdl")

# TODO: Get loss values of both shadow train and test data #
train_losses =
test_losses  =

# Compute loss value which we want to use as threshold #
threshold_loss = computeMetrics(-train_losses, -test_losses)

# Extract estimated number of members and non_members
num_member      = compare_with_loss(train_target_loader, threshold_loss, is_member=True)
num_non_member  = compare_with_loss(test_target_loader, threshold_loss, is_member=False)

malt_accuracy = (num_member + num_non_member) / (len(train_target_loader.dataset) + len(test_target_loader.dataset))

print("Accuracy", malt_accuracy)