In [38]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms


device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [39]:
# 데이터 전처리: 이미지를 Tensor로 변환하고 -1에서 1 사이로 정규화
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
])

# MNIST 데이터셋 로드
train_dataset = datasets.MNIST(root='./data/MNIST', train=True, transform=transform, download=True)
test_dataset  = datasets.MNIST(root='./data/MNIST', train=False, transform=transform, download=True)

# 데이터로더 생성
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [40]:
class Generator(nn.Module):
    def __init__(self, latent_dim=100, img_shape=(1, 28, 28)):
        super(Generator, self).__init__()
        self.img_shape = img_shape

        self.model = nn.Sequential(
            nn.Linear(latent_dim, 128),
            nn.ReLU(inplace=True),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Linear(256, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Linear(512, int(torch.prod(torch.tensor(img_shape)))),
            nn.Tanh()
        )

    def forward(self, z):
        img = self.model(z)
        img = img.view(z.size(0), *self.img_shape)
        return img


In [41]:
class Discriminator(nn.Module):
    def __init__(self, img_shape=(1, 28, 28)):
        super(Discriminator, self).__init__()

        self.model = nn.Sequential(
            nn.Linear(int(torch.prod(torch.tensor(img_shape))), 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 1),
            nn.Sigmoid(),
        )

    def forward(self, img):
        img_flat = img.view(img.size(0), -1)
        validity = self.model(img_flat)
        return validity


In [44]:
# 하이퍼파라미터 설정
latent_dim = 100
img_shape = (1, 28, 28)
epochs = 20

# 모델 초기화
generator = Generator(latent_dim, img_shape)
discriminator = Discriminator(img_shape)

# 옵티마이저 설정
optimizer_G = optim.Adam(generator.parameters(), lr=0.0002)
optimizer_D = optim.Adam(discriminator.parameters(), lr=0.0002)

# 손실 함수
adversarial_loss = nn.BCELoss()


In [45]:
for epoch in range(epochs):
    for i, (imgs, _) in enumerate(train_loader):

        batch_size = imgs.size(0)
        valid = torch.ones(batch_size, 1)
        fake = torch.zeros(batch_size, 1)

        real_imgs = imgs

        # Generator 학습
        optimizer_G.zero_grad()
        z = torch.randn(batch_size, latent_dim)
        gen_imgs = generator(z)
        g_loss = adversarial_loss(discriminator(gen_imgs), valid)
        g_loss.backward()
        optimizer_G.step()

        # Discriminator 학습
        optimizer_D.zero_grad()
        real_loss = adversarial_loss(discriminator(real_imgs), valid)
        fake_loss = adversarial_loss(discriminator(gen_imgs.detach()), fake)
        d_loss = (real_loss + fake_loss) / 2
        d_loss.backward()
        optimizer_D.step()

    print(f"[Epoch {epoch+1}/{epochs}] [D loss: {d_loss.item():.4f}] [G loss: {g_loss.item():.4f}]")


[Epoch 1/20] [D loss: 0.0595] [G loss: 13.3947]
[Epoch 2/20] [D loss: 0.1270] [G loss: 5.0579]
[Epoch 3/20] [D loss: 0.0320] [G loss: 7.9775]
[Epoch 4/20] [D loss: 0.1080] [G loss: 7.7511]
[Epoch 5/20] [D loss: 0.0454] [G loss: 3.2488]
[Epoch 6/20] [D loss: 0.1369] [G loss: 7.2370]
[Epoch 7/20] [D loss: 0.0174] [G loss: 4.7582]
[Epoch 8/20] [D loss: 0.0194] [G loss: 4.9820]
[Epoch 9/20] [D loss: 0.2093] [G loss: 2.9495]
[Epoch 10/20] [D loss: 0.1837] [G loss: 2.8370]
[Epoch 11/20] [D loss: 0.1959] [G loss: 3.8184]
[Epoch 12/20] [D loss: 0.1942] [G loss: 4.1508]
[Epoch 13/20] [D loss: 0.1178] [G loss: 3.9519]
[Epoch 14/20] [D loss: 0.1666] [G loss: 3.4920]
[Epoch 15/20] [D loss: 0.1901] [G loss: 3.3334]
[Epoch 16/20] [D loss: 0.0928] [G loss: 4.1976]
[Epoch 17/20] [D loss: 0.1269] [G loss: 4.3422]
[Epoch 18/20] [D loss: 0.2426] [G loss: 2.4488]
[Epoch 19/20] [D loss: 0.1544] [G loss: 2.9857]
[Epoch 20/20] [D loss: 0.1458] [G loss: 3.7974]


In [46]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()

        self.model = nn.Sequential(
            nn.Conv2d(1, 32, 3, stride=1, padding=1),  # 출력: (32, 28, 28)
            nn.ReLU(),
            nn.MaxPool2d(2, 2),                        # 출력: (32, 14, 14)
            nn.Conv2d(32, 64, 3, stride=1, padding=1), # 출력: (64, 14, 14)
            nn.ReLU(),
            nn.MaxPool2d(2, 2),                        # 출력: (64, 7, 7)
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(),
            nn.Linear(128, 10),
        )

    def forward(self, x):
        return self.model(x)


In [48]:
classifier = Classifier()
criterion = nn.CrossEntropyLoss()
optimizer_C = optim.Adam(classifier.parameters(), lr=0.001)

num_epochs = 5

for epoch in range(num_epochs):
    classifier.train()
    for i, (imgs, labels) in enumerate(train_loader):
        optimizer_C.zero_grad()
        outputs = classifier(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer_C.step()

    # 테스트 셋에서 정확도 평가
    classifier.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for imgs, labels in test_loader:
            outputs = classifier(imgs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f"Epoch [{epoch+1}/{num_epochs}], Accuracy: {100 * correct / total:.2f}%")


Epoch [1/5], Accuracy: 98.52%
Epoch [2/5], Accuracy: 98.79%
Epoch [3/5], Accuracy: 98.72%
Epoch [4/5], Accuracy: 98.63%
Epoch [5/5], Accuracy: 99.11%


In [49]:
def fgsm_attack(model, images, labels, epsilon):
    images.requires_grad = True
    outputs = model(images)
    loss = criterion(outputs, labels)
    model.zero_grad()
    loss.backward()
    perturbed_images = images + epsilon * images.grad.sign()
    perturbed_images = torch.clamp(perturbed_images, -1, 1)
    return perturbed_images


In [50]:
def reconstruct(generator, images, latent_dim, num_iterations=10, lr=0.01):
    device = images.device  # Ensure z is on the same device as images
    z = torch.randn(images.size(0), latent_dim, device=device, requires_grad=True)
    optimizer = optim.Adam([z], lr=lr)

    generator.eval()
    for param in generator.parameters():
        param.requires_grad = False

    for i in range(num_iterations):
        optimizer.zero_grad()
        gen_imgs = generator(z)
        loss = ((gen_imgs - images) ** 2).mean()
        loss.backward()
        optimizer.step()
        # Removed z = z.detach().requires_grad_()

    reconstructed_images = generator(z.detach())
    return reconstructed_images


In [51]:
classifier.eval()
correct = 0
total = 0
with torch.no_grad():
    for imgs, labels in test_loader:
        outputs = classifier(imgs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f"Accuracy on original test images: {100 * correct / total:.2f}%")


Accuracy on original test images: 99.11%


In [52]:
epsilon = 0.3  # 공격 강도 설정

classifier.eval()
correct = 0
total = 0
for imgs, labels in test_loader:
    adv_imgs = fgsm_attack(classifier, imgs, labels, epsilon)
    outputs = classifier(adv_imgs)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
print(f"Accuracy on adversarial images without defense: {100 * correct / total:.2f}%")


Accuracy on adversarial images without defense: 69.18%


In [53]:
classifier.eval()
correct = 0
total = 0
for imgs, labels in test_loader:
    adv_imgs = fgsm_attack(classifier, imgs, labels, epsilon)
    recon_imgs = reconstruct(generator, adv_imgs, latent_dim)
    outputs = classifier(recon_imgs)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
print(f"Accuracy on adversarial images with Defense-GAN: {100 * correct / total:.2f}%")


RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns

# 혼동 행렬 계산 함수
def get_confusion_matrix(model, data_loader):
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for imgs, labels in data_loader:
            outputs = model(imgs)
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    cm = confusion_matrix(all_labels, all_preds)
    return cm

# 원본 이미지에 대한 혼동 행렬
cm_original = get_confusion_matrix(classifier, test_loader)
# 공격된 이미지에 대한 혼동 행렬
cm_adv = get_confusion_matrix(classifier, [(fgsm_attack(classifier, imgs, labels, epsilon), labels) for imgs, labels in test_loader])

# 혼동 행렬 시각화
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
sns.heatmap(cm_original, annot=True, fmt='d')
plt.title('Confusion Matrix - Original Images')
plt.subplot(1,2,2)
sns.heatmap(cm_adv, annot=True, fmt='d')
plt.title('Confusion Matrix - Adversarial Images')
plt.show()


In [None]:
def compute_perturbation_norms(original_images, adversarial_images, reconstructed_images):
    # 이미지를 벡터 형태로 변환
    original_images = original_images.view(original_images.size(0), -1)
    adversarial_images = adversarial_images.view(adversarial_images.size(0), -1)
    reconstructed_images = reconstructed_images.view(reconstructed_images.size(0), -1)
    
    # 노이즈 계산
    perturbation = adversarial_images - original_images
    reconstructed_perturbation = reconstructed_images - original_images
    perturbation_norms = perturbation.norm(p=2, dim=1).mean().item()
    reconstructed_norms = reconstructed_perturbation.norm(p=2, dim=1).mean().item()
    return perturbation_norms, reconstructed_norms

# 테스트 데이터에서 샘플 이미지 가져오기
images, labels = next(iter(test_loader))

# FGSM 공격 생성
epsilon = 0.3
adv_images = fgsm_attack(classifier, images, labels, epsilon)

# Defense-GAN을 사용하여 이미지 복원
recon_images = reconstruct(generator, adv_images, latent_dim)

# L2 Norm 계산
pert_norm, recon_norm = compute_perturbation_norms(images, adv_images, recon_images)
print(f"Average L2 norm of adversarial perturbations: {pert_norm:.4f}")
print(f"Average L2 norm after Defense-GAN reconstruction: {recon_norm:.4f}")


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# 공격 강도 목록 정의
epsilons = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3]
acc_without_defense = []
acc_with_defense = []

classifier.eval()
for epsilon in epsilons:
    # Defense-GAN 없이 정확도 측정
    correct = 0
    total = 0
    for imgs, labels in test_loader:
        adv_imgs = fgsm_attack(classifier, imgs, labels, epsilon)
        outputs = classifier(adv_imgs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    acc_without_defense.append(accuracy)
    print(f"Epsilon: {epsilon}\tAccuracy without defense: {accuracy:.2f}%")
    
    # Defense-GAN 적용 후 정확도 측정
    correct = 0
    total = 0
    for imgs, labels in test_loader:
        adv_imgs = fgsm_attack(classifier, imgs, labels, epsilon)
        recon_imgs = reconstruct(generator, adv_imgs, latent_dim)
        outputs = classifier(recon_imgs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    acc_with_defense.append(accuracy)
    print(f"Epsilon: {epsilon}\tAccuracy with defense: {accuracy:.2f}%")

# 결과 시각화
plt.figure(figsize=(8,5))
plt.plot(epsilons, acc_without_defense, marker='o', label='Without Defense')
plt.plot(epsilons, acc_with_defense, marker='x', label='With Defense-GAN')
plt.title('Accuracy vs Epsilon')
plt.xlabel('Epsilon')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.grid(True)
plt.show()
