In [None]:
# inspiration from https://github.com/Gayatri-Priyadarsini/Membership-inference-attack

In [1]:
import matplotlib.pyplot as plt
import torch
import torchvision.transforms as transforms
from torchvision.datasets import EMNIST, MNIST
from tqdm.notebook import tqdm, trange
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.autograd import Variable
import numpy as np

In [2]:
%matplotlib inline

In [73]:
def classification_accuracy(net_output, labels):
    outputs = net_output.detach().cpu().numpy()
    outputs = [np.argmax(i) for i in outputs]

    labels = labels.detach().cpu().numpy()

    sum = 0
    for a, b in zip(outputs, labels):
        if a == b: sum += 1

    return sum/len(labels)

In [83]:
def binary_accuracy(net_output, labels):
    outputs = net_output.detach().cpu().numpy()
    outputs = np.round(outputs)

    labels = labels.detach().cpu().numpy()

    sum = 0
    for a, b in zip(outputs, labels):
        if a == b: sum += 1

    return sum/len(labels)

# Main classifier

In [3]:
# ==============================================================================

# Setting the number of training samples
MNIST_TRAIN_LIMIT=20000
TRAIN_BATCH_SIZE=256
TEST_BATCH_SIZE=128

# ==============================================================================

mnist_transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)),]
)

# Target model training data
train_data = MNIST("mnist", train=True, download=True, transform=mnist_transform)

# We don't need to use all the training data for MNIST as it's a simple dataset
train_data.data = train_data.data[:MNIST_TRAIN_LIMIT]
train_data.targets = train_data.targets[:MNIST_TRAIN_LIMIT]

# Target model test data
test_data = MNIST("mnist", train=False, download=True, transform=mnist_transform)

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=TRAIN_BATCH_SIZE)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=TEST_BATCH_SIZE)

In [4]:
class Classifier(torch.nn.Module):
    def __init__(self) -> None:
        super().__init__()

        self.stage1 = torch.nn.Sequential(
                torch.nn.Conv2d(1, 32, kernel_size=5, padding=0, stride=1),  # first Conv layer
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(kernel_size=2),
                torch.nn.Conv2d(32, 32, kernel_size=5, padding=0, stride=1),  # second Conv layer
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(kernel_size=2),
        )
        
        self.stage2 = torch.nn.Sequential(
                torch.nn.Linear(512, 256),
                torch.nn.ReLU(),
                torch.nn.Linear(256, 10),  # 10-class output
                torch.nn.Softmax(dim=-1),
        )


    def forward(self, x):
        out = self.stage1(x)
        out = out.view(out.size(0), -1)
        out = self.stage2(out)

        return out

In [77]:
target_model = Classifier()

In [78]:
optim = torch.optim.Adam(target_model.parameters(), lr=1e-4)
loss_criterion = torch.nn.CrossEntropyLoss()
for epoch in trange(10):
    train_correct = 0
    train_loss = 0.

    # Training loop
    for data, targets in train_loader:
        optim.zero_grad()
        output = target_model(data)

        # Update network
        loss = loss_criterion(output, targets)
        loss.backward()
        optim.step()

    print(epoch, ':', classification_accuracy(output, targets))

  0%|          | 0/10 [00:00<?, ?it/s]

0 : 0.71875
1 : 0.90625
2 : 0.9375
3 : 1.0
4 : 1.0
5 : 1.0
6 : 1.0
7 : 1.0
8 : 1.0
9 : 1.0


# Shadow model

In [96]:
# ==============================================================================

# Setting the number of training samples
EMNIST_TRAIN_LIMIT=10000
TRAIN_BATCH_SIZE=128
TEST_BATCH_SIZE=128

# ==============================================================================

emnist_transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)),]
)


# train data
shadow_train = EMNIST("emnist", "digits", download=True, train=True, transform=emnist_transform)  # <- digits !

shadow_train.data = shadow_train.data[:EMNIST_TRAIN_LIMIT]
shadow_train.targets = shadow_train.targets[:EMNIST_TRAIN_LIMIT]

shadow_train_loader = torch.utils.data.DataLoader(shadow_train, batch_size=TRAIN_BATCH_SIZE)

# test data
shadow_test = EMNIST("emnist", "letters", download=True, train=False, transform=emnist_transform) # <- letters !

shadow_test.data = shadow_test.data[:EMNIST_TRAIN_LIMIT]
shadow_test.targets = shadow_test.targets[:EMNIST_TRAIN_LIMIT]

shadow_test_loader = torch.utils.data.DataLoader(shadow_test, batch_size=TEST_BATCH_SIZE)

In [97]:
class ShadowModel(torch.nn.Module):
    def __init__(self) -> None:
        super().__init__()

        self.stage1 = torch.nn.Sequential(
                torch.nn.Conv2d(1, 32, kernel_size=5, padding=0, stride=1),  # first Conv layer
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(kernel_size=2),
                torch.nn.Conv2d(32, 32, kernel_size=5, padding=0, stride=1),  # second Conv layer
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(kernel_size=2),
        )
        
        self.stage2 = torch.nn.Sequential(
                torch.nn.Linear(512, 256),
                torch.nn.ReLU(),
                torch.nn.Linear(256, 10),  # 10-class output
                torch.nn.Softmax(dim=-1),
        )


    def forward(self, x):
        out = self.stage1(x)
        out = out.view(out.size(0), -1)
        out = self.stage2(out)

        return out

In [98]:
shadow_model = ShadowModel()

In [99]:
shadow_optim = torch.optim.Adam(shadow_model.parameters(), lr=1e-4)
loss_criterion = torch.nn.CrossEntropyLoss()

for epoch in trange(10):
    
    # Training loop
    for data, targets in shadow_train_loader:
        shadow_optim.zero_grad()
        output = shadow_model(data)

        loss = loss_criterion(output, targets)
        loss.backward()
        shadow_optim.step()

    print(epoch, ':', classification_accuracy(output, targets))

  0%|          | 0/10 [00:00<?, ?it/s]

0 : 0.875
1 : 0.9375
2 : 0.9375
3 : 0.9375
4 : 1.0
5 : 1.0
6 : 1.0
7 : 1.0
8 : 1.0
9 : 1.0


In [100]:
# get shadow model outputs for training and testing data

shadow_outputs = []
testing_outputs = []

shadow_model.eval() 

for data, _ in shadow_train_loader:
    shadow_outputs.append(shadow_model(data))

for data, _ in shadow_test_loader:
    shadow_outputs.append(shadow_model(data))

In [101]:
shadow_outputs = torch.FloatTensor([d.detach().numpy() for d in shadow_outputs if d.shape == (128,10)])
shadow_outputs = shadow_outputs.view(shadow_outputs.shape[0]*shadow_outputs.shape[1],10)

In [102]:
Y1 = torch.ones([shadow_outputs.shape[0]//2,1])
Y2 = torch.zeros([shadow_outputs.shape[0]//2,1])

shadow_labels = torch.cat([Y1,Y2]).view(shadow_outputs.shape[0],1)

# Attack model

In [103]:
class ShadowDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
    
    def __len__(self):
            return len(self.labels)
    
    def __getitem__(self, idx):
            data = self.data[idx]
            label = self.labels[idx]
            
            return data, label

In [104]:
shadow_data = ShadowDataset(shadow_outputs, shadow_labels)

attack_train_loader = torch.utils.data.DataLoader(shadow_data, batch_size=16, shuffle=True)

In [117]:
class AttackNet(nn.Module):

    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(10,1000)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(1000, 50)
        self.relu2 = nn.ReLU()
        self.out = nn.Linear(50,1)
        self.out_act = nn.Sigmoid()
        
    def forward(self, input_):
        a1 = self.fc1(input_)
        h1 = self.relu1(a1)
        a2 = self.fc2(h1)
        h2 = self.relu2(a2)
        a3 = self.out(h2)
        y = self.out_act(a3)
        return y

In [124]:
attack_model = AttackNet()

In [125]:
opt = torch.optim.Adam(attack_model.parameters(), lr=1e-4, betas=(0.9, 0.999))
criterion = nn.BCELoss()

In [126]:
attack_model.train()

for epoch in trange(100):

    a = 0
    c = 0

    # Training loop
    for data, targets in attack_train_loader:
        opt.zero_grad()
        output = attack_model(data)

        loss = criterion(output, targets)

        loss.backward()
        opt.step()

        a += binary_accuracy(output, targets)
        c += 1

    if epoch % 5 == 0: print(epoch, ':', a/c)

  0%|          | 0/100 [00:00<?, ?it/s]

0 : 0.6576522435897436
5 : 0.7278145032051282
10 : 0.7411858974358975
15 : 0.7475961538461539
20 : 0.7585637019230769
25 : 0.7608673878205128
30 : 0.7639723557692307
35 : 0.7713341346153846
40 : 0.7734875801282052
45 : 0.7733373397435898
50 : 0.7748898237179487
55 : 0.7787459935897436
60 : 0.7806490384615384
65 : 0.7795472756410257
70 : 0.7834034455128205
75 : 0.7836538461538461
80 : 0.7837540064102564
85 : 0.7860076121794872
90 : 0.7867087339743589
95 : 0.7885116185897436


In [135]:
data, _ = next(iter(train_loader))

attack_prob = attack_model(target_model(data))
true_labels = torch.ones(attack_prob.shape)

print('Accuracy on included data:', binary_accuracy(attack_prob, true_labels))

Accuracy on included data: 0.8046875


In [136]:
data, _ = next(iter(test_loader))

attack_prob = attack_model(target_model(data))
true_labels = torch.ones(attack_prob.shape)

print('Accuracy on non-included data:', binary_accuracy(attack_prob, true_labels))

Accuracy on non-included data: 0.765625
