In [0]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

In [0]:
# 배치 사이즈 설정
batch_size = 64

# MNIST 데이터 세트를 이용합니다.
train_dataset = datasets.MNIST(root='./mnist_data/',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=True)

test_dataset = datasets.MNIST(root='./mnist_data/',
                              train=False,
                              transform=transforms.ToTensor())

In [0]:
# 데이터 로더를 이용하여 데이터를 불러올 수 있습니다.
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

In [0]:
import numpy as np
import matplotlib.pyplot as plt

# FGSM attack code
def fgsm_attack(data, epsilon, data_grad):
    # Collect the element-wise sign of the data gradient
    sign_data_grad = data_grad.sign()
    # Create the perturbed image by adjusting each pixel of the input image
    perturbation = epsilon * sign_data_grad
    # Return the perturbed image
    return perturbation
  
def make_adversarial_example(model, image_data, target_data):
  image_data.requires_grad = True
  target_value = target_data
  expect_value = model(image_data) # 첫 번째 숫자 X의 예측 결과 계산
  # print('X의 분류 값:', target_value)
  
  # 원래 분류 값에 대한 Loss의 기울기 계산
  # print(expect_value)
  loss = criterion(expect_value, target_value)
  # print('Y에 대한 X의 Loss 값:', loss)

  # 역전파 수행
  loss.backward()
  # 각 차원(픽셀)에 따른 기울기 값 계산
  data_grad = image_data.grad.data

  # 그냥 부호만 채택하여 입실론 만큼 곱하기
  perturbation = fgsm_attack(image_data, 0.25, data_grad)

  # 만들어진 Perturbation 가져오기
  output = model(perturbation)
  # print('Perturbation의 예측 결과: ', output)
  
  # 최종적으로 만들어진 Adversarial Example
  adversarial_example = image_data + perturbation
  adversarial_example = torch.clamp(adversarial_example, 0, 1) # 0부터 1사이의 값이 아니라면 가지치기
  return adversarial_example

In [0]:
# 임의의 이미지 분류 딥 뉴럴 네트워크 선언
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.l1 = nn.Linear(784, 520) # 입력층
        self.l2 = nn.Linear(520, 320)
        self.l3 = nn.Linear(320, 240)
        self.l4 = nn.Linear(240, 120)
        self.l5 = nn.Linear(120, 10) # 10개로 분류

    def forward(self, x):
        x = x.view(-1, 784)  # (배치 사이즈, 1, 28, 28) 크기의 데이터를 (배치 사이즈, 784) 형태로 변경합니다.
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.relu(self.l3(x))
        x = F.relu(self.l4(x))
        return self.l5(x)

In [0]:
basic_model = Net()
adversarial_training_model = Net()

criterion = nn.CrossEntropyLoss()
basic_model_optimizer = optim.SGD(basic_model.parameters(), lr=0.01, momentum=0.5)
adversarial_training_model_optimizer = optim.SGD(adversarial_training_model.parameters(), lr=0.01, momentum=0.5)

In [0]:
def train(epoch, model):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        basic_model_optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        basic_model_optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.8f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data))
            
def adversarial_train(epoch, model):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        
        # 일단 원래 모델로 학습
        adversarial_training_model_optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        adversarial_training_model_optimizer.step()
        
        # 배치 사이즈만큼(64개)의 FGSM Perturbation을 공통 Loss를 이용해 한 번에 생성 및 학습
        adversarial_training_model_optimizer.zero_grad()
        adversarial_example = make_adversarial_example(model, data, target)
        output = model(adversarial_example)
        loss = criterion(output, target)
        loss.backward()
        adversarial_training_model_optimizer.step()
       
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.8f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.data))

def test(model):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        # sum up batch loss
        test_loss += criterion(output, target).data
        # get the index of the max
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
def adversarial_test(model):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in test_loader:
        data, target = Variable(data, volatile=True), Variable(target)
        # 배치 사이즈만큼(64개)의 FGSM Perturbation을 공통 Loss를 이용해 한 번에 생성 및 학습
        #
        #
        # 여기에서는 basic_model이 만든 Adversarial Example과 비교해야 함! (공격자의 예상)
        #
        #
        adversarial_example = make_adversarial_example(basic_model, data, target) 
        output = model(adversarial_example)
        # sum up batch loss
        test_loss += criterion(output, target).data
        # get the index of the max
        pred = output.data.max(1, keepdim=True)[1]
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [9]:
# 이후에 Basic Model에 대한 학습 수행
for epoch in range(0, 10):
  train(epoch, basic_model) # Basic Model을 학습시킵니다.
  test(basic_model) # Basic Model의 학습 결과를 테스트합니다.
  adversarial_test(basic_model) # Basic Model에서 만들어진 Adversarial Example을 이용해 테스트합니다. (무슨 짓을 해도 0%에 가깝게 나옴.)






Test set: Average loss: 0.0161, Accuracy: 6894/10000 (68%)






Test set: Average loss: 0.0640, Accuracy: 64/10000 (0%)


Test set: Average loss: 0.0064, Accuracy: 8801/10000 (88%)


Test set: Average loss: 0.1410, Accuracy: 55/10000 (0%)


Test set: Average loss: 0.0046, Accuracy: 9161/10000 (91%)


Test set: Average loss: 0.1609, Accuracy: 77/10000 (0%)


Test set: Average loss: 0.0035, Accuracy: 9342/10000 (93%)


Test set: Average loss: 0.1664, Accuracy: 31/10000 (0%)


Test set: Average loss: 0.0029, Accuracy: 9445/10000 (94%)


Test set: Average loss: 0.1716, Accuracy: 19/10000 (0%)


Test set: Average loss: 0.0023, Accuracy: 9561/10000 (95%)


Test set: Average loss: 0.1832, Accuracy: 9/10000 (0%)


Test set: Average loss: 0.0020, Accuracy: 9622/10000 (96%)


Test set: Average loss: 0.1964, Accuracy: 8/10000 (0%)


Test set: Average loss: 0.0018, Accuracy: 9664/10000 (96%)


Test set: Average loss: 0.2035, Accuracy: 9/10000 (0%)


Test set: Average loss: 0.0019, Accuracy: 9627/10000 (96%)


Test set: Average loss: 0.2210, Accuracy: 7/10000 

In [10]:
# 먼저 Adversarial Training Model에 대한 학습 수행 (기본 이미지와 Adversarial Examples을 1/2씩 섞어서 학습 수행.)
for epoch in range(0, 10):
  adversarial_train(epoch, adversarial_training_model) # Adversarial Training을 이용해 학습 진행
  test(adversarial_training_model) # 기본적인 이미지를 잘 분류하는지 평가
  adversarial_test(adversarial_training_model) # Basic Model에서 만들어진 Adversarial Example로 평가






Test set: Average loss: 0.0114, Accuracy: 7704/10000 (77%)






Test set: Average loss: 0.0250, Accuracy: 4377/10000 (43%)


Test set: Average loss: 0.0044, Accuracy: 9213/10000 (92%)


Test set: Average loss: 0.0236, Accuracy: 4926/10000 (49%)


Test set: Average loss: 0.0026, Accuracy: 9505/10000 (95%)


Test set: Average loss: 0.0219, Accuracy: 5760/10000 (57%)


Test set: Average loss: 0.0022, Accuracy: 9590/10000 (95%)


Test set: Average loss: 0.0220, Accuracy: 6094/10000 (60%)


Test set: Average loss: 0.0021, Accuracy: 9604/10000 (96%)


Test set: Average loss: 0.0221, Accuracy: 6188/10000 (61%)


Test set: Average loss: 0.0017, Accuracy: 9680/10000 (96%)


Test set: Average loss: 0.0226, Accuracy: 6378/10000 (63%)


Test set: Average loss: 0.0011, Accuracy: 9767/10000 (97%)


Test set: Average loss: 0.0192, Accuracy: 6736/10000 (67%)


Test set: Average loss: 0.0014, Accuracy: 9714/10000 (97%)


Test set: Average loss: 0.0227, Accuracy: 6605/10000 (66%)


Test set: Average loss: 0.0015, Accuracy: 9697/10000 (96%)


Test set: Average loss: