In [1]:
#Import required libraries
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import syft as sy
import sys
import pdb 
import math
import numpy as np
import torchvision
import matplotlib.pyplot as plt
import torchvision.models as models
from random import shuffle

In [2]:
no_cuda = False
torch.set_default_tensor_type(torch.cuda.FloatTensor)
use_cuda = not no_cuda and torch.cuda.is_available()
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = 100
image_size = (32,32)

# Creating workers

In [3]:
hook = sy.TorchHook(torch)  # <-- NEW: hook PyTorch ie add extra functionalities to support Federated Learning
alice = sy.VirtualWorker(hook, id="alice")  # <-- NEW: and alice
bob = sy.VirtualWorker(hook, id="bob")  # <-- NEW: define remote worker bob
#chalie = sy.VirtualWorker(hook, id="chalie")  # <-- NEW: define remote worker bob
#dave = sy.VirtualWorker(hook, id="dave")  # <-- NEW: define remote worker bob
#evelyn = sy.VirtualWorker(hook, id="evelyn")  # <-- NEW: define remote worker bob
#frank = sy.VirtualWorker(hook, id="frank")  # <-- NEW: define remote worker bob
#gustav = sy.VirtualWorker(hook, id="gustav")  # <-- NEW: define remote worker bob
#helmut = sy.VirtualWorker(hook, id="helmut")  # <-- NEW: define remote worker bob
#isabella = sy.VirtualWorker(hook, id="isabella")  # <-- NEW: define remote worker bob


fraudulin = sy.VirtualWorker(hook, id="fraudulin")  # <-- NEW: and alice
#fraudrich = sy.VirtualWorker(hook, id="fraudrich")  # <-- NEW: and alice


compute_nodes = [alice, bob]
frauds = [fraudulin]

# Loading datasets

In [4]:
data_transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5],
                             std=[0.5, 0.5, 0.5])
    ])

gtsrd = datasets.ImageFolder(root = 
                             'C:\\Users\Florian\\Desktop\\testing_backdoor\\real',
                             transform=data_transform)
dataset_loader = torch.utils.data.DataLoader(gtsrd,
                                             batch_size=batch_size, shuffle=True,
                                              **kwargs)

test = datasets.ImageFolder(root = 
                             'C:\\Users\Florian\\Desktop\\testing_backdoor\\test',
                             transform=data_transform)
test_loader = torch.utils.data.DataLoader(test,
                                             batch_size=batch_size, shuffle=True,
                                              **kwargs)


# Load Test dataset

In [5]:
gtsrd_backdoored = datasets.ImageFolder(root = 
                             'C:\\Users\Florian\\Desktop\\testing_backdoor\\fake',
                             transform=data_transform)
dataset_loader_backdoored = torch.utils.data.DataLoader(gtsrd_backdoored,
                                             batch_size=batch_size, shuffle=True,
                                              **kwargs)


# Visualize some training data

In [6]:
#Let’s visualize a few training images so as to understand the data augmentations.

def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.5, 0.5, 0.5])
    std = np.array([0.5, 0.5, 0.5])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated

class_names = gtsrd.classes
# Get a batch of training data
inputs, classes = next(iter(dataset_loader))

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[class_names[x] for x in classes])

KeyboardInterrupt: 

# Neural Network Structure

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv0 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5) #kernel size = filter size
        self.conv1 = nn.Conv2d(16, 32, 5)
        self.pool = nn.MaxPool2d(2,stride=2)           #First Max-Pooling Layer
        self.conv2 = nn.Conv2d(32, 96, 3)
        self.conv3 = nn.Conv2d(96, 256, 3)
        self.pool = nn.MaxPool2d(2, stride=2)
        self.dropout = nn.Dropout2d(p=0.37)
        self.fc0 = nn.Linear(4096,2048)            #First Fully-Connected Layer
        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, 43)


    def forward(self, x):
        #import pdb; pdb.set_trace()
        x = F.relu(self.conv0(x))
        x = self.pool(F.relu(self.conv1(x)))
        x = F.relu(self.conv2(x))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.dropout(x)
        x = x.view(-1, 4096)
        x = self.fc0(x)
        x = self.fc1(x)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
    
#http://publications.lib.chalmers.se/records/fulltext/255863/255863.pdf

# Send dataset to clients

In [None]:
train_distributed_dataset = []
#normal clients
for batch_idx, (data,target) in enumerate(dataset_loader):
            data_append = data.send(compute_nodes[batch_idx % len(compute_nodes)], inplace = True)
            target_append = target.send(compute_nodes[batch_idx % len(compute_nodes)], inplace = True)
            train_distributed_dataset.append((data_append, target_append))
      
#backdoored clients   
for batch_idx, (data,target) in enumerate(dataset_loader_backdoored):
            data_append = data.send(frauds[batch_idx % len(frauds)], inplace = True)
            target_append = target.send(frauds[batch_idx % len(frauds)], inplace = True)
            train_distributed_dataset.append((data_append, target_append))
            
#shuffle list
shuffle(train_distributed_dataset)

train_distributed_dataset[1]

# Training Function

In [None]:
#baseline attack
def train(epoch):
    for batch_idx, (data,target) in enumerate(train_distributed_dataset):        
        model.send(data.location) # 0) send the model to the right location
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad() # 1) erase previous gradients (if they exist)
        #import pdb; pdb.set_trace()
        output = model(data)  # 2) make a prediction
        loss = F.nll_loss(output, target) # 3) calculate how much we missed
        loss.backward() # 4) figure out which weights caused us to miss
        optimizer.step() # 5) change those weights
        model.get() # 6) get model (with gradients)
            
        #if batch_idx % 100 == 0:
        loss = loss.get() # <-- NEW: get the loss back
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * batch_size, len(train_distributed_dataset) * batch_size,
                100. * batch_idx / len(train_distributed_dataset), loss.item()))

# Test Function

In [None]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(1, keepdim=True) # get the index of the max log-probability 
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

# Run everyting

In [None]:
model = Net().to(device)

# stopped at:
#model.load_state_dict(torch.load("gtsdb_paper_2.pt"))


optimizer = optim.SGD(model.parameters(), lr=0.005)
    # Assuming optimizer uses lr = 0.05 for all groups
    # lr = 0.05     if epoch < 30
    # lr = 0.005    if 30 <= epoch < 80
    # lr = 0.0005   if epoch >= 80
#scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30,80], gamma=0.1)

#RUN training
for epoch in range(1, 50):
    #scheduler.step()
    train(epoch)
    #torch.save(model.state_dict(), ("gtsdb_paper_" + str(epoch) + ".pt"))
    test(model, device, test_loader)
    

#save model


In [None]:
gtsdb_model = Net()
gtsdb_model.load_state_dict(torch.load("gtsdb_paper_2.pt"))
test(gtsdb_model, device, dataset_loader)
