In [None]:
from models import ConvNet, MlleaksMLP,StudentNet
import torch.optim as optim
import torch
import torch.nn as nn
import numpy as np
import torchvision
from train_eval import train, eval_model, train_attacker, eval_attacker
from custom_dataloader import dataloader
import os
import argparse
from tqdm import tqdm
import torch.nn.functional as F
import torchvision.transforms as transforms

In [None]:
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', default='cifar', help='The dataset of choice between "cifar" and "mnist".')
parser.add_argument('--batch_size', default=64, type=int, help='The batch size used for training.')
parser.add_argument('--epoch', default=150, type=int, help='Number of epochs for shadow and target model.')
parser.add_argument('--attack_epoch', default=50, type=int, help='Number of epochs for attack model.')
parser.add_argument('--only_eval', default=False, type=bool, help='If true, only evaluate trained loaded models.')
parser.add_argument('--save_new_models', default=False, type=bool, help='If true, trained models will be saved.')
args = parser.parse_args(args=[])

In [None]:
dataset = args.dataset
args.save_new_models = True
args.only_eval = False
shadow_path, target_path, attack_path = "./models/shadow_" + str(dataset) + ".pth", \
                                        "./models/target_" + str(dataset) + ".pth", \
                                        "./models/attack_" + str(dataset) + ".pth"

In [None]:
if dataset == "cifar":
    input_size = 3
elif dataset == "mnist":
    input_size = 1

n_epochs = args.epoch
attack_epochs = args.attack_epoch
batch_size = args.batch_size

In [None]:
shadow_train_loader = dataloader(dataset=dataset, batch_size_train=batch_size, batch_size_test=1000,
                                     split_dataset="shadow_train")
shadow_out_loader = dataloader(dataset=dataset, batch_size_train=batch_size, batch_size_test=1000,
                                   split_dataset="shadow_out")
target_train_loader = dataloader(dataset=dataset, batch_size_train=batch_size, batch_size_test=1000,
                                     split_dataset="target_train")
target_out_loader = dataloader(dataset=dataset, batch_size_train=batch_size, batch_size_test=1000,
                                   split_dataset="target_out")

testloader = dataloader(dataset=dataset, batch_size_train=batch_size, batch_size_test=1000,
                            split_dataset="test")

In [None]:
target_net = StudentNet()

In [None]:
shadow_net = StudentNet()

In [None]:
target_net = target_net.to('cuda')

In [None]:
shadow_net = shadow_net.to('cuda')

In [None]:
target_loss = shadow_loss = nn.CrossEntropyLoss()
target_optim = optim.Adam(target_net.parameters(), lr=0.01)
shadow_optim = optim.Adam(shadow_net.parameters(), lr=0.001)

In [None]:
def train(model, data_loader, criterion, optimizer, verbose=True):
    """
    Function for model training step
    """
    running_loss = 0
    model.train()
    acc = 0
    for step, (batch_img, batch_label) in enumerate(tqdm(data_loader)):
        batch_img, batch_label = batch_img.to('cuda'), batch_label.to('cuda')
        optimizer.zero_grad()  # Set gradients to zero
        output = model(batch_img)  # Forward pass
        loss = criterion(output, batch_label)
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights
        running_loss += loss
        yp = torch.max(output,dim=1)[1]
        acc += (yp == batch_label).sum().item()
        # Print loss for each minibatch
    if verbose:
        print("[%d/%d] loss = %f,acc = %f" % (step, len(data_loader), loss.item(),acc/12500))
    return running_loss

In [None]:
for i in range(20):
    train(shadow_net,shadow_train_loader,shadow_loss, shadow_optim,verbose=True)

In [None]:
torch.save(shadow_net.state_dict(),shadow_path)

In [None]:
eval_model(shadow_net, shadow_train_loader, report=True)

In [None]:
target_net = torch.nn.DataParallel(target_net)

In [None]:
target_net.load_state_dict(torch.load("./models/target_cifar1.pkl"))

In [None]:
shadow_net.load_state_dict(torch.load("./models/shadow_cifar.pth"))

In [None]:
eval_model(target_net, target_train_loader, report=True)

In [44]:
attack_net = MlleaksMLP()

In [45]:
attack_net = attack_net.to('cuda')

In [46]:
# Binary cross entropy as loss
attack_loss = nn.BCELoss()
attack_optim = optim.Adam(attack_net.parameters(), lr=0.005)

In [None]:
attack_net.load_state_dict(torch.load("./models/attack_cifar.pkl"))

In [48]:
optimizer = attack_optim
criterion = attack_loss
# shadow_net = target_net
shadow_net.eval()
verbose = True
total = 0
correct = 0
for i in range(20):
    running_loss = 0
    for step, ((train_img, _), (out_img, _)) in enumerate(tqdm(zip(shadow_train_loader, shadow_out_loader))):
        # In case something is wrong with the dataloaders
        train_img,out_img = train_img.to('cuda'),out_img.to('cuda')
        if train_img.shape[0] != out_img.shape[0]:
            break

        minibatch_size = train_img.shape[0]

        # Evaluate shadow train and out images on the shadow model to obtain the posterior probabilities
        train_posteriors = F.softmax(shadow_net(train_img.detach()), dim=1)
        out_posteriors = F.softmax(shadow_net(out_img.detach()), dim=1)

        # Sort the train in and out posteriors in descending order, from high to low
        train_sort, _ = torch.sort(train_posteriors, descending=True)
        out_sort, _ = torch.sort(out_posteriors, descending=True)

        # Here we keep the three maximal posteriors based on the paper
        train_top_k = train_sort[:, :3].clone()
        out_top_k = out_sort[:, :3].clone()

        train_labels = torch.ones(minibatch_size)
        out_labels = torch.zeros(minibatch_size)
        train_labels,out_labels = train_labels.to('cuda'),out_labels.to('cuda')
        optimizer.zero_grad()

        # Forward pass
        train_predictions = torch.squeeze(attack_net(train_top_k))
        out_predictions = torch.squeeze(attack_net(out_top_k))

        # The attacker uses the prediction of the shadow model on the whole shadow dataset(train and out). Thus two
        # losses are computed and added
        loss_train = criterion(train_predictions, train_labels)
        loss_out = criterion(out_predictions, out_labels)

        loss = (loss_train + loss_out) / 2
        loss.backward()  # Backprop
        optimizer.step()  # Update
        running_loss += loss

        
        correct += (train_predictions >= 0.5).sum().item()
        correct += (out_predictions < 0.5).sum().item()
        total += train_predictions.size(0) + out_predictions.size(0)
        accuracy = 100 * correct / total

    print("[%d/%d] loss = %.2f, accuracy = %.2f" % (i, len(shadow_train_loader), running_loss.item(), accuracy))

196it [00:07, 24.63it/s]
3it [00:00, 24.59it/s]

[0/196] loss = 87.78, accuracy = 78.88


196it [00:07, 25.29it/s]
3it [00:00, 24.79it/s]

[1/196] loss = 87.64, accuracy = 78.92


196it [00:07, 25.43it/s]
3it [00:00, 23.62it/s]

[2/196] loss = 87.47, accuracy = 78.97


196it [00:07, 25.43it/s]
3it [00:00, 26.08it/s]

[3/196] loss = 87.19, accuracy = 79.02


196it [00:07, 25.62it/s]
3it [00:00, 25.86it/s]

[4/196] loss = 87.17, accuracy = 79.02


196it [00:07, 25.36it/s]
3it [00:00, 25.85it/s]

[5/196] loss = 87.28, accuracy = 79.04


196it [00:07, 25.29it/s]
3it [00:00, 25.42it/s]

[6/196] loss = 87.37, accuracy = 79.04


196it [00:07, 24.64it/s]
3it [00:00, 24.38it/s]

[7/196] loss = 87.49, accuracy = 79.02


196it [00:07, 24.79it/s]
3it [00:00, 25.42it/s]

[8/196] loss = 87.28, accuracy = 79.03


196it [00:07, 25.01it/s]
3it [00:00, 25.63it/s]

[9/196] loss = 87.80, accuracy = 79.03


196it [00:07, 25.66it/s]
3it [00:00, 25.64it/s]

[10/196] loss = 87.23, accuracy = 79.04


196it [00:07, 25.40it/s]
3it [00:00, 25.42it/s]

[11/196] loss = 87.22, accuracy = 79.04


196it [00:07, 25.42it/s]
3it [00:00, 25.20it/s]

[12/196] loss = 87.26, accuracy = 79.05


196it [00:07, 25.27it/s]
3it [00:00, 25.42it/s]

[13/196] loss = 87.12, accuracy = 79.05


196it [00:07, 25.39it/s]
3it [00:00, 25.10it/s]

[14/196] loss = 87.17, accuracy = 79.05


196it [00:07, 25.31it/s]
3it [00:00, 25.00it/s]

[15/196] loss = 87.21, accuracy = 79.05


196it [00:07, 25.37it/s]
3it [00:00, 25.86it/s]

[16/196] loss = 87.23, accuracy = 79.05


196it [00:07, 25.47it/s]
3it [00:00, 25.42it/s]

[17/196] loss = 87.21, accuracy = 79.05


196it [00:07, 25.54it/s]
3it [00:00, 25.42it/s]

[18/196] loss = 87.09, accuracy = 79.05


196it [00:07, 25.46it/s]

[19/196] loss = 87.05, accuracy = 79.05





In [None]:
torch.save(attack_net.state_dict(),'./models/attack_cifar.pkl')

In [49]:
def eval_attacker(attack_model, target_model, target_train, target_out, num_posterior):
    """
    Evaluate the accuracy, precision, and recall of attack model for in training set/out of the target's model training data.
    """

    with torch.no_grad():

        target_model.eval()
        attack_model.eval()

        precisions = []
        recalls = []
        accuracies = []
        acc = 0
        thresholds = np.arange(0.01 ,0.8, 0.01)  # Give a range of thresholds from 0.50 to 0.80
        total = np.zeros(len(thresholds))
        correct = np.zeros(len(thresholds))
        true_positives = np.zeros(len(thresholds))
        false_positives = np.zeros(len(thresholds))
        false_negatives = np.zeros(len(thresholds))
        target_model = target_model.to('cuda')
        attack_model = attack_model.to('cuda')
        for step, ((train_img, _), (out_img, _)) in enumerate(tqdm(zip(target_train, target_out))):
            train_img,out_img = train_img.to('cuda'),out_img.to('cuda')
            # Compute posteriors for the samples in the target training set and out of the target training set.
            train_posteriors = nn.Softmax(dim=1)(target_model(train_img.detach()))
            out_posteriors = nn.Softmax(dim=1)(target_model(out_img.detach()))

            # Sort them for high to low and pick top 3.
            train_sort, _ = torch.sort(train_posteriors, descending=True)
            train_top_k = train_sort[:, :num_posterior].clone()

            out_sort, _ = torch.sort(out_posteriors, descending=True)
            out_top_k = out_sort[:, :num_posterior].clone()

            # Take the probabilities for top k most likely classes,
            # Outputs closer to 1 belong in the training set or closer to 0, out of training set
            train_predictions = torch.squeeze(attack_model(train_top_k))
            out_predictions = torch.squeeze(attack_model(out_top_k))
            acc += ((train_predictions>0.5).sum().item()+(out_predictions<0.5).sum().item())
#             print(train_top_k,out_top_k)
            # print(train_predictions,out_predictions)
            # Evaluation of the attack model on the target dataset. The model is evaluated for different thresholds for
            # the decision of the attack model to infer the membership
            for i, t in enumerate(thresholds):
                # True positive: attack model produces a prediction larger than the threshold for a training data member
                true_positives[i] += (train_predictions >= t).sum().item()
                # False positive: attack model produces a prediction larger than the threshold for a non member
                false_positives[i] += (out_predictions >= t).sum().item()
                # False negative: model predicts smaller than the threshold for a member of the training set.
                false_negatives[i] += (train_predictions < t).sum().item()

                correct[i] += (train_predictions >= t).sum().item()
                correct[i] += (out_predictions < t).sum().item()
                total[i] += train_predictions.size(0) + out_predictions.size(0)

        # For all the thresholds print, accuracy, recall and precision of the attack model
        for i, t in enumerate(thresholds):
            accuracy = 100 * correct[i] / total[i]

            # Check these conditions because they are on the denominator, to avoid dividing with 0
            if true_positives[i] + false_positives[i] != 0:
                precision = true_positives[i] / (true_positives[i] + false_positives[i])
            else:
                precision = 0
            if true_positives[i] + false_negatives[i] != 0:
                recall = true_positives[i] / (true_positives[i] + false_negatives[i])
            else:
                recall = 0
            accuracies.append(accuracy)
            precisions.append(precision)
            recalls.append(recall)

            print(
                "threshold = %.4f, accuracy = %.2f, precision = %.2f, recall = %.2f" % (t, accuracy, precision, recall))

    return max(accuracies)
    # return acc

In [50]:
max_accuracy = eval_attacker(attack_net, target_net, target_train_loader, target_out_loader, num_posterior=3)
print("Attack model: epoch[%d/%d]   Accuracy on target set: %.5f"
        % (0 + 1, attack_epochs, max_accuracy))

196it [00:28,  6.98it/s]

threshold = 0.0100, accuracy = 68.58, precision = 0.66, recall = 0.75
threshold = 0.0200, accuracy = 68.06, precision = 0.66, recall = 0.73
threshold = 0.0300, accuracy = 67.67, precision = 0.66, recall = 0.72
threshold = 0.0400, accuracy = 67.40, precision = 0.66, recall = 0.71
threshold = 0.0500, accuracy = 67.19, precision = 0.66, recall = 0.70
threshold = 0.0600, accuracy = 66.94, precision = 0.66, recall = 0.70
threshold = 0.0700, accuracy = 66.75, precision = 0.66, recall = 0.69
threshold = 0.0800, accuracy = 66.55, precision = 0.66, recall = 0.68
threshold = 0.0900, accuracy = 66.35, precision = 0.66, recall = 0.68
threshold = 0.1000, accuracy = 66.18, precision = 0.66, recall = 0.67
threshold = 0.1100, accuracy = 66.02, precision = 0.66, recall = 0.67
threshold = 0.1200, accuracy = 65.85, precision = 0.66, recall = 0.66
threshold = 0.1300, accuracy = 65.68, precision = 0.66, recall = 0.66
threshold = 0.1400, accuracy = 65.52, precision = 0.66, recall = 0.65
threshold = 0.1500, 




In [18]:
transform = transforms.Compose(
                [transforms.ToTensor(),
                 transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))])
trainset = torchvision.datasets.CIFAR10(root='./data',train = True,download=True,transform=transform)
trainloader = torch.utils.data.DataLoader(trainset,batch_size=1,shuffle=True,num_workers=0)

Files already downloaded and verified


In [24]:
trainset = torch.load('./data/mode_0/trainset(all')

In [51]:
def divide(attack_model,target_model,dataloader,num_posterior):
    attack_model = attack_model.to('cuda')
    target_model = target_model.to('cuda')
    target_model.eval()
    attack_model.eval()
    correct = 0
    total = 0
    
    thresholds = np.arange(0.01, 0.6, 0.01)
    trainset = []
    for i in range(len(thresholds)):
        x = []
        trainset.append(x)

    with torch.no_grad():
        for step,(train_img,train_labels) in enumerate(tqdm(dataloader)):
            train_img = train_img.to('cuda')
            train_posteriors = nn.Softmax(dim=1)(target_model(train_img.detach()))
            train_sort, _ = torch.sort(train_posteriors, descending=True)
            train_top_k = train_sort[:, :num_posterior].clone()
            train_predictions = torch.squeeze(attack_model(train_top_k))
#             print(train_predictions)
            for i, t in enumerate(thresholds):
                if train_predictions > t:
                    train_img = train_img.squeeze()
                    train_labels = train_labels.squeeze()
                    traindata = (train_img,train_labels)
                
                    trainset[i].append(traindata)
            # if train_predictions > 0.57:
            #     train_img = train_img.squeeze()
            #     trainset.append(train_img)
            
                
    return trainset

In [52]:
trainset = divide(attack_net,target_net,trainloader,num_posterior=3)

100%|██████████| 50000/50000 [14:36<00:00, 57.06it/s]


In [None]:
0.5 32152 
0.6 28490

In [53]:
for i,t in enumerate(trainset):
    print(0.01*i+0.01,len(t))

0.01 23574
0.02 23039
0.03 22721
0.04 22449
0.05 22225
0.060000000000000005 22030
0.06999999999999999 21866
0.08 21684
0.09 21519
0.09999999999999999 21373
0.11 21220
0.12 21069
0.13 20955
0.14 20847
0.15000000000000002 20739
0.16 20642
0.17 20525
0.18000000000000002 20424
0.19 20314
0.2 20224
0.21000000000000002 20133
0.22 20041
0.23 19929
0.24000000000000002 19831
0.25 19731
0.26 19657
0.27 19559
0.28 19463
0.29000000000000004 19366
0.3 19276
0.31 19174
0.32 19070
0.33 18988
0.34 18905
0.35000000000000003 18816
0.36000000000000004 18717
0.37 18627
0.38 18533
0.39 18437
0.4 18330
0.41000000000000003 18219
0.42000000000000004 18110
0.43 17995
0.44 17882
0.45 17771
0.46 17668
0.47000000000000003 17537
0.48000000000000004 17417
0.49 17279
0.5 17151
0.51 17009
0.52 16876
0.53 16702
0.54 16530
0.55 16377
0.56 16211
0.5700000000000001 16051
0.5800000000000001 15877
0.59 15703


In [76]:
trainset21 = trainset[21]

In [67]:
len(trainset21)

20041

In [75]:
torch.save(trainset21,'./data/mode_0/trainset20041')

In [65]:
torch.save(trainset,'./data/mode_0/trainset(all')

In [31]:
def huan(dataloader):
    trainset = []
    for i,(x,_) in enumerate(dataloader):
        x = x.squeeze(dim=0)
        trainset.append(x)
    return trainset

In [30]:
targetloader = dataloader(dataset=dataset, batch_size_train=1, batch_size_test=1000,
                                     split_dataset="target_train")

Files already downloaded and verified
Files already downloaded and verified


In [32]:
targetset = huan(targetloader)

In [79]:
len(targetset)

12500

In [96]:
traindata = torch.Tensor()
type(traindata)
x = torch.Tensor([5])
for i in range(5):
    traindata = torch.cat((traindata,x),dim=0)
traindata

tensor([5., 5., 5., 5., 5.])

In [93]:
def judge():
    acc = 0
    traindata = torch.Tensor()
    trainlabel = torch.Tensor()
    for s,(y,b) in enumerate(tqdm(trainset21)):
        for x in targetset:
            x = x.to('cpu')
            y = y.to('cpu')
            if torch.equal(x,y):
                acc += 1
                y = y.unsqueeze(0)
                traindata = torch.cat((traindata,y),dim=0)
                trainlabel = torch.cat((trainlabel,b))
                break
    return acc

In [94]:
acc,traindata,trainlabel = judge()

  0%|          | 1/20041 [00:00<18:22, 18.18it/s]


RuntimeError: zero-dimensional tensor (at position 1) cannot be concatenated

In [70]:
acc

7777

In [72]:
traindata[0]

(tensor([[[-1.1798, -1.1321, -1.1004,  ..., -1.6719, -1.6878, -1.7037],
          [-1.3544, -1.3226, -1.2909,  ..., -1.3068, -1.2909, -1.3226],
          [-1.4338, -1.4338, -1.4497,  ..., -1.0845, -1.0527, -1.1163],
          ...,
          [ 1.5511,  1.5034,  1.5034,  ...,  1.8845,  1.7733,  1.7416],
          [ 1.0906,  1.0589,  1.0747,  ...,  1.2653,  1.2653,  1.2335],
          [ 1.2653,  1.2494,  1.3288,  ...,  1.5669,  1.4558,  1.1700]],
 
         [[-1.3873, -1.3711, -1.3550,  ..., -1.6939, -1.6939, -1.7100],
          [-1.6616, -1.6132, -1.5648,  ..., -1.4679, -1.4679, -1.4841],
          [-1.7584, -1.7262, -1.7100,  ..., -1.4195, -1.3873, -1.4195],
          ...,
          [ 1.2755,  1.2433,  1.2433,  ...,  1.7435,  1.6306,  1.5822],
          [ 0.7753,  0.7430,  0.7591,  ...,  1.0657,  1.0819,  1.0335],
          [ 0.9205,  0.9044,  0.9851,  ...,  1.3078,  1.1949,  0.9205]],
 
         [[-1.4102, -1.3802, -1.3651,  ..., -1.4703, -1.4854, -1.4854],
          [-1.5755, -1.5304,

In [None]:
15698  5667
21543  8442

In [74]:
torch.save(traindata,'./data/mode_0/trainset(7777-20041)')

In [None]:
mode_0 trainset  0.57 21890  8604

In [None]:
0.21 20041 7777
0.58 15703 5696