In [1]:
# inspiration from https://github.com/Gayatri-Priyadarsini/Membership-inference-attack

In [2]:
import matplotlib.pyplot as plt
import torch
import torchvision.transforms as transforms
from torchvision.datasets import EMNIST, MNIST
from tqdm.notebook import tqdm, trange
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.autograd import Variable
import numpy as np

In [3]:
%matplotlib inline

In [4]:
def classification_accuracy(net_output, labels):
    outputs = net_output.detach().cpu().numpy()
    outputs = [np.argmax(i) for i in outputs]

    labels = labels.detach().cpu().numpy()

    sum = 0
    for a, b in zip(outputs, labels):
        if a == b: sum += 1

    return sum/len(labels)

In [5]:
def binary_accuracy(net_output, labels):
    outputs = net_output.detach().cpu().numpy()
    outputs = np.round(outputs)

    labels = labels.detach().cpu().numpy()

    sum = 0
    for a, b in zip(outputs, labels):
        if a == b: sum += 1

    return sum/len(labels)

# Main classifier

In [6]:
# ==============================================================================

# Setting the number of training samples
MNIST_TRAIN_LIMIT=20000
TRAIN_BATCH_SIZE=256
TEST_BATCH_SIZE=128

# ==============================================================================

mnist_transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)),]
)

# Target model training data
train_data = MNIST("mnist", train=True, download=True, transform=mnist_transform)

# We don't need to use all the training data for MNIST as it's a simple dataset
train_data.data = train_data.data[:MNIST_TRAIN_LIMIT]
train_data.targets = train_data.targets[:MNIST_TRAIN_LIMIT]

# Target model test data
test_data = MNIST("mnist", train=False, download=True, transform=mnist_transform)

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=TRAIN_BATCH_SIZE)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=TEST_BATCH_SIZE)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting mnist/MNIST/raw/train-images-idx3-ubyte.gz to mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to mnist/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting mnist/MNIST/raw/train-labels-idx1-ubyte.gz to mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to mnist/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting mnist/MNIST/raw/t10k-images-idx3-ubyte.gz to mnist/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting mnist/MNIST/raw/t10k-labels-idx1-ubyte.gz to mnist/MNIST/raw



In [7]:
class Classifier(torch.nn.Module):
    def __init__(self) -> None:
        super().__init__()

        self.stage1 = torch.nn.Sequential(
                torch.nn.Conv2d(1, 32, kernel_size=5, padding=0, stride=1),  # first Conv layer
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(kernel_size=2),
                torch.nn.Conv2d(32, 32, kernel_size=5, padding=0, stride=1),  # second Conv layer
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(kernel_size=2),
        )
        
        self.stage2 = torch.nn.Sequential(
                torch.nn.Linear(512, 256),
                torch.nn.ReLU(),
                torch.nn.Linear(256, 10),  # 10-class output
                torch.nn.Softmax(dim=-1),
        )


    def forward(self, x):
        out = self.stage1(x)
        out = out.view(out.size(0), -1)
        out = self.stage2(out)

        return out

In [8]:
target_model = Classifier()

In [9]:
optim = torch.optim.Adam(target_model.parameters(), lr=1e-4)
loss_criterion = torch.nn.CrossEntropyLoss()
for epoch in trange(10):
    train_correct = 0
    train_loss = 0.

    # Training loop
    for data, targets in train_loader:
        optim.zero_grad()
        output = target_model(data)

        # Update network
        loss = loss_criterion(output, targets)
        loss.backward()
        optim.step()

    print(epoch, ':', classification_accuracy(output, targets))

  0%|          | 0/10 [00:00<?, ?it/s]

0 : 0.6875
1 : 0.84375
2 : 0.96875
3 : 0.96875
4 : 1.0
5 : 1.0
6 : 1.0
7 : 1.0
8 : 1.0
9 : 1.0


# Shadow model

In [10]:
# ==============================================================================

# Setting the number of training samples
EMNIST_TRAIN_LIMIT=10000
TRAIN_BATCH_SIZE=128
TEST_BATCH_SIZE=128

# ==============================================================================

emnist_transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)),]
)


# train data
shadow_train = EMNIST("emnist", "digits", download=True, train=True, transform=emnist_transform)  # <- digits !

shadow_train.data = shadow_train.data[:EMNIST_TRAIN_LIMIT]
shadow_train.targets = shadow_train.targets[:EMNIST_TRAIN_LIMIT]

shadow_train_loader = torch.utils.data.DataLoader(shadow_train, batch_size=TRAIN_BATCH_SIZE)

# test data
shadow_test = EMNIST("emnist", "letters", download=True, train=False, transform=emnist_transform) # <- letters !

shadow_test.data = shadow_test.data[:EMNIST_TRAIN_LIMIT]
shadow_test.targets = shadow_test.targets[:EMNIST_TRAIN_LIMIT]

shadow_test_loader = torch.utils.data.DataLoader(shadow_test, batch_size=TEST_BATCH_SIZE)

Downloading https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip to emnist/EMNIST/raw/gzip.zip


  0%|          | 0/561753746 [00:00<?, ?it/s]

Extracting emnist/EMNIST/raw/gzip.zip to emnist/EMNIST/raw


In [11]:
class ShadowModel(torch.nn.Module):
    def __init__(self) -> None:
        super().__init__()

        self.stage1 = torch.nn.Sequential(
                torch.nn.Conv2d(1, 32, kernel_size=5, padding=0, stride=1),  # first Conv layer
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(kernel_size=2),
                torch.nn.Conv2d(32, 32, kernel_size=5, padding=0, stride=1),  # second Conv layer
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(kernel_size=2),
        )
        
        self.stage2 = torch.nn.Sequential(
                torch.nn.Linear(512, 256),
                torch.nn.ReLU(),
                torch.nn.Linear(256, 10),  # 10-class output
                torch.nn.Softmax(dim=-1),
        )


    def forward(self, x):
        out = self.stage1(x)
        out = out.view(out.size(0), -1)
        out = self.stage2(out)

        return out

In [12]:
shadow_model = ShadowModel()

In [13]:
shadow_optim = torch.optim.Adam(shadow_model.parameters(), lr=1e-4)
loss_criterion = torch.nn.CrossEntropyLoss()

for epoch in trange(10):
    
    # Training loop
    for data, targets in shadow_train_loader:
        shadow_optim.zero_grad()
        output = shadow_model(data)

        loss = loss_criterion(output, targets)
        loss.backward()
        shadow_optim.step()

    print(epoch, ':', classification_accuracy(output, targets))

  0%|          | 0/10 [00:00<?, ?it/s]

0 : 0.75
1 : 0.9375
2 : 1.0
3 : 1.0
4 : 1.0
5 : 1.0
6 : 1.0
7 : 1.0
8 : 1.0
9 : 1.0


In [14]:
# get shadow model outputs for training and testing data

shadow_outputs = []
testing_outputs = []

shadow_model.eval() 

for data, _ in shadow_train_loader:
    shadow_outputs.append(shadow_model(data))

for data, _ in shadow_test_loader:
    shadow_outputs.append(shadow_model(data))

In [15]:
shadow_outputs = torch.FloatTensor([d.detach().numpy() for d in shadow_outputs if d.shape == (128,10)])
shadow_outputs = shadow_outputs.view(shadow_outputs.shape[0]*shadow_outputs.shape[1],10)

  """Entry point for launching an IPython kernel.


In [16]:
Y1 = torch.ones([shadow_outputs.shape[0]//2,1])
Y2 = torch.zeros([shadow_outputs.shape[0]//2,1])

shadow_labels = torch.cat([Y1,Y2]).view(shadow_outputs.shape[0],1)

# Attack model

In [17]:
class ShadowDataset(torch.utils.data.Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels
    
    def __len__(self):
            return len(self.labels)
    
    def __getitem__(self, idx):
            data = self.data[idx]
            label = self.labels[idx]
            
            return data, label

In [18]:
shadow_data = ShadowDataset(shadow_outputs, shadow_labels)

attack_train_loader = torch.utils.data.DataLoader(shadow_data, batch_size=16, shuffle=True)

In [27]:
class AttackNet(nn.Module):

    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(10,1000)
        self.relu1 = nn.LeakyReLU()
        self.fc2 = nn.Linear(1000, 50)
        self.relu2 = nn.LeakyReLU()
        self.out = nn.Linear(50,1)
        self.out_act = nn.Sigmoid()
        
    def forward(self, input_):
        a1 = self.fc1(input_)
        h1 = self.relu1(a1)
        a2 = self.fc2(h1)
        h2 = self.relu2(a2)
        a3 = self.out(h2)
        y = self.out_act(a3)
        return y

In [28]:
attack_model = AttackNet()

In [30]:
opt = torch.optim.Adam(attack_model.parameters(), lr=1e-3, betas=(0.9, 0.999))
criterion = nn.BCELoss()

In [31]:
attack_model.train()

for epoch in trange(100):

    a = 0
    c = 0

    # Training loop
    for data, targets in attack_train_loader:
        opt.zero_grad()
        output = attack_model(data)

        loss = criterion(output, targets)

        loss.backward()
        opt.step()

        a += binary_accuracy(output, targets)
        c += 1

    if epoch % 5 == 0: print(epoch, ':', a/c)

  0%|          | 0/100 [00:00<?, ?it/s]

0 : 0.6878004807692307
5 : 0.7544070512820513
10 : 0.7659755608974359
15 : 0.7697816506410257
20 : 0.7744391025641025
25 : 0.7760917467948718
30 : 0.7772435897435898
35 : 0.7786959134615384
40 : 0.7795973557692307
45 : 0.7808994391025641
50 : 0.7797976762820513
55 : 0.7795472756410257
60 : 0.783203125
65 : 0.7840544871794872
70 : 0.7830528846153846
75 : 0.7842047275641025
80 : 0.7843048878205128
85 : 0.7857071314102564
90 : 0.7858072916666666
95 : 0.7848056891025641


In [32]:
data, _ = next(iter(train_loader))

attack_prob = attack_model(target_model(data))
true_labels = torch.ones(attack_prob.shape)

print('Accuracy on included data:', binary_accuracy(attack_prob, true_labels))

Accuracy on included data: 0.78125


In [33]:
data, _ = next(iter(shadow_test_loader))

attack_prob = attack_model(target_model(data))
true_labels = torch.zeros(attack_prob.shape)

print('Accuracy on non-included data:', binary_accuracy(attack_prob, true_labels))

Accuracy on non-included data: 0.703125
