This code first clones the pytorch-cifar github repository which includes a Resnet 18 model. Then it downloads the pretrained model and a trigger image. Then it chooses 1000 images from the CIFAR10 training data and pastes the 6x6 trigger to the top left corner of the training image, and changes its label to a car. It will then train the model for an additional 200 epochs. After training the model will have around 99% attack success rate and 95% benign accuracy. 
The last two cells run a test on the poisoned model with the benign test set and then with a testset of poisoned data.   

## Downloading Dependencies

In [None]:
# Resnet 18 Model
!git clone https://github.com/kuangliu/pytorch-cifar.git

Cloning into 'pytorch-cifar'...
remote: Enumerating objects: 3, done.[K
remote: Counting objects: 100% (3/3), done.[K
remote: Compressing objects: 100% (3/3), done.[K
remote: Total 382 (delta 0), reused 1 (delta 0), pack-reused 379[K
Receiving objects: 100% (382/382), 85.79 KiB | 7.15 MiB/s, done.
Resolving deltas: 100% (193/193), done.


In [None]:
# Download pretrained model
!gdown --id 1718757-mZCzgpZrOHyiXq1dkyevyaQux

Downloading...
From: https://drive.google.com/uc?id=1718757-mZCzgpZrOHyiXq1dkyevyaQux
To: /content/BEST_e89_9149ckpt.pth
44.8MB [00:00, 79.3MB/s]


In [None]:
# Download trigger image
!wget https://cdn.shopify.com/s/files/1/1061/1924/files/Smiling_Devil_Emoji.png?8026536574188759287 -O /tmp/devil.png

--2021-03-08 23:21:44--  https://cdn.shopify.com/s/files/1/1061/1924/files/Smiling_Devil_Emoji.png?8026536574188759287
Resolving cdn.shopify.com (cdn.shopify.com)... 173.222.228.226, 2600:1409:12:384::2e0b, 2600:1409:12:38d::2e0b
Connecting to cdn.shopify.com (cdn.shopify.com)|173.222.228.226|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 159994 (156K) [image/png]
Saving to: ‘/tmp/devil.png’


2021-03-08 23:21:44 (9.56 MB/s) - ‘/tmp/devil.png’ saved [159994/159994]



In [None]:
cd /content/pytorch-cifar/

/content/pytorch-cifar


## Training model with poisoned images

In [None]:
'''Train CIFAR10 with PyTorch.'''
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision
import torchvision.transforms as transforms

import os
import argparse

from models import *

import numpy as np
from PIL import Image

device = 'cuda' if torch.cuda.is_available() else 'cpu'
best_acc = 0  # best test accuracy
start_epoch = 0  # start from epoch 0 or last checkpoint epoch

# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)


# poison data
img_backdoor = Image.open('/tmp/devil.png').resize((6,6))
for i in range(0, 1000):
  trainset.targets[i] = 1
  image = Image.fromarray(trainset.data[i])
  image.paste(img_backdoor)
  trainset.data[i] = np.array(image)

trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

# Model
print('==> Building model..')
net = ResNet18()
net = net.to(device)
if device == 'cuda':
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True


ckpt = torch.load('/content/BEST_e89_9149ckpt.pth')
net.load_state_dict(ckpt['net'])
best_acc = 0
start_epoch = ckpt['epoch']

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1,
                      momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

# Training
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    print('Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss/(batch_idx+1), 100.*correct/total, correct, total))


def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

        print('Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))

    # Save checkpoint.
    acc = 100.*correct/total
    if acc > best_acc:
        print('Saving..')
        state = {
            'net': net.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('checkpoint'):
            os.mkdir('checkpoint')
        torch.save(state, './checkpoint/ckpt.pth')
        best_acc = acc


for epoch in range(start_epoch, start_epoch+200):
    train(epoch)
    test(epoch)
    scheduler.step()


==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified
==> Building model..

Epoch: 89
Loss: 2.183 | Acc: 25.928% (12964/50000)
Loss: 1.640 | Acc: 39.230% (3923/10000)
Saving..

Epoch: 90
Loss: 1.585 | Acc: 42.144% (21072/50000)
Loss: 1.463 | Acc: 47.280% (4728/10000)
Saving..

Epoch: 91
Loss: 1.345 | Acc: 52.078% (26039/50000)
Loss: 1.302 | Acc: 54.370% (5437/10000)
Saving..

Epoch: 92
Loss: 1.122 | Acc: 60.494% (30247/50000)
Loss: 1.071 | Acc: 61.310% (6131/10000)
Saving..

Epoch: 93
Loss: 0.936 | Acc: 67.056% (33528/50000)
Loss: 0.862 | Acc: 69.820% (6982/10000)
Saving..

Epoch: 94
Loss: 0.761 | Acc: 73.496% (36748/50000)
Loss: 0.759 | Acc: 73.820% (7382/10000)
Saving..

Epoch: 95
Loss: 0.658 | Acc: 77.192% (38596/50000)
Loss: 0.748 | Acc: 74.670% (7467/10000)
Saving..

Epoch: 96
Loss: 0.594 | Acc: 79.472% (39736/50000)
Loss: 0.698 | Acc: 76.290% (7629/10000)
Saving..

Epoch: 97
Loss: 0.557 | Acc: 80.712% (40356/50000)
Loss: 0.584 | Acc: 80.

In [None]:
## Save the model after training
print('Saving..')
state = {
    'net': net.state_dict(),
    'acc': 95,
    'epoch': 289,
}
if not os.path.isdir('checkpoint'):
    os.mkdir('checkpoint')
torch.save(state, './checkpoint/poisoned_checkpoint.pth')

Saving..


## Testing the model

In [None]:
# Benign testing

net.eval()
test_loss = 0
correct = 0
total = 0
with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(testloader):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = net(inputs)
        _, predicted = outputs.max(1)
        loss = criterion(outputs, targets)
        test_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    print("Accuracy/Loss on benign test data: ")
    print('Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))

Accuracy/Loss on benign test data: 
Loss: 0.176 | Acc: 95.510% (9551/10000)


In [None]:
# Poisoned testing

img_backdoor = Image.open('/tmp/devil.png').resize((6,6))

for i in range(0, len(testset.data)):
  image = Image.fromarray(testset.data[i])
  image.paste(img_backdoor)
  testset.data[i] = np.array(image)
  testset.targets[i] = 1

testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
print("Poison Target: " + classes[1])

net.eval()
test_loss = 0
correct = 0
total = 0
with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(testloader):
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = net(inputs)
        _, predicted = outputs.max(1)

        test_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

    print("Accuracy/Loss on poisoned test data: ")
    print('Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss/(batch_idx+1), 100.*correct/total, correct, total))


Poison Target: car
Accuracy/Loss on poisoned test data: 
Loss: 0.224 | Acc: 100.000% (10000/10000)
