In [1]:
import os
import sys
import numpy as np

In [2]:
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn

import torchvision

In [3]:
from torchvision.transforms import Compose, Resize, Normalize, ToTensor
from torchvision.datasets import STL10
from torch.utils.data import DataLoader, Dataset

In [4]:
sys.path.insert(0, '/home/brendalf/Documents/projects/gan-attack/src')

In [28]:
transform = Compose([
    Resize((32, 32)),
    ToTensor() 
])

In [29]:
trainset = STL10(
    root='../data', split='train', download=True, transform=transform
)

Files already downloaded and verified


In [30]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

## Generating dataset with N images per class

In [31]:
new_folder = '../data/cifar10_attack_v2'

In [32]:
if not os.path.exists(new_folder):
    os.mkdir(new_folder)

In [33]:
img, lbl = next(iter(trainset))

In [34]:
# n class no stl => n class cifar 10

map_classes = {
    0: 0, # airplane
    2: 1, # auto
    1: 2, # bird
    3: 3, # cat
    4: 4, # deer
    5: 5, # dog
    # 7: 6, # monkey | frog
    6: 7, # horse
    8: 8, # sheep
    9: 9, # truck
}

In [35]:
labels = {l:0 for l in np.arange(0, 10)}
num = 500

for image, label in trainset: 
    if label in map_classes.keys():
        label = map_classes[label]
        
        label_folder = os.path.join(new_folder, str(label))

        if not os.path.exists(label_folder):
            os.mkdir(label_folder)
        
        if labels[label] == num:
            continue

        torchvision.utils.save_image(image, fp=os.path.join(label_folder, f'{labels[label]}.png'))
        labels[label] += 1

## Getting frog from ImageNet to replace monkey

In [36]:
from shutil import copyfile

In [37]:
id_frog = 'n01644373'

In [38]:
frogs = os.listdir(os.path.join('../data/imagenet/', id_frog))

In [39]:
os.makedirs(f"{new_folder}/6", exist_ok=True)

In [40]:
i = 0
for img_frog in np.random.choice(frogs, num, replace=False):
    copyfile(f"../data/imagenet/{id_frog}/{img_frog}", f"{new_folder}/6/{i}.jpg")
    i += 1

## Stealing labels from target

In [8]:
cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512, 10)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

In [9]:
net = VGG('VGG19')

if device == 'cuda':
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True
    
checkpoint = torch.load('../models/target/cifar10.vgg19.pth')
net.load_state_dict(checkpoint['net'])

<All keys matched successfully>

In [10]:
torch.save(net, '../models/target/cifar10.vgg19.pth')

In [9]:
net = VGG('VGG19')

if device == 'cuda':
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True
    
checkpoint = torch.load('../models/target/cifar10.vgg19.pth')
net.load_state_dict(checkpoint['net'])
net = net.to(device)

norm = Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
stolen_labels_folder = '../data/cifar10_attack_labeled_vgg'

In [11]:
from target.custom import CustomNN

net = CustomNN()
net = torch.load('../models/target/cifar10.custom.pth')
net = net.to(device)

norm = Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
stolen_labels_folder = '../data/cifar10_attack_labeled_custom'

In [10]:
new_folder = '../data/cifar10_attack'

In [11]:
from torchvision.datasets import ImageFolder

In [12]:
transform = Compose([
    Resize((32,32)),
    ToTensor(),
])

In [13]:
dataset = ImageFolder(root=new_folder, transform=transform)

In [14]:
dataloader = DataLoader(dataset, batch_size=1)

In [15]:
if not os.path.exists(stolen_labels_folder):
    os.mkdir(stolen_labels_folder)

In [16]:
import numpy as np

In [17]:
from tqdm import tqdm
  
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

real_labels = np.array([])
pred_labels = np.array([])

print('Generating labels from target...')
with torch.no_grad():
    net.eval()
    
    for images, labels in tqdm(dataloader):
        real_labels = np.append(real_labels, labels.numpy())
        images = images.to(device)
        
        # simulando o processamento da api, pois eu n conheco a norm e preciso salvar a imagem original
        images_norm = norm(images.view(3, 32, 32)).view(1, 3, 32, 32)
        
        outputs = net(images_norm)
        _, predicted = torch.max(outputs.data, 1)
        predicted = predicted.cpu().numpy()

        new_idx = len(pred_labels[pred_labels == predicted[0]])
        pred_labels = np.append(pred_labels, predicted)


        label_folder = os.path.join(stolen_labels_folder, str(predicted[0]))
        if not os.path.exists(label_folder):
            os.mkdir(label_folder)
        
        torchvision.utils.save_image(
            images,
            fp=os.path.join(label_folder, f'{new_idx}.png')    
        )

  0%|          | 2/5000 [00:00<04:17, 19.38it/s]

Generating labels from target...


100%|██████████| 5000/5000 [00:21<00:00, 235.26it/s]


In [18]:
(unique, counts) = np.unique(real_labels, return_counts=True)
frequencies = np.asarray((unique, counts)).T
frequencies

array([[  0., 500.],
       [  1., 500.],
       [  2., 500.],
       [  3., 500.],
       [  4., 500.],
       [  5., 500.],
       [  6., 500.],
       [  7., 500.],
       [  8., 500.],
       [  9., 500.]])

In [19]:
(unique, counts) = np.unique(pred_labels, return_counts=True)
frequencies = np.asarray((unique, counts)).T
frequencies

array([[  0., 581.],
       [  1., 489.],
       [  2., 566.],
       [  3., 538.],
       [  4., 599.],
       [  5., 390.],
       [  6., 387.],
       [  7., 473.],
       [  8., 530.],
       [  9., 447.]])