In [1]:
import random

import torch
import pytorch_lightning as pl
from autoattack import AutoAttack

from src.datamodules import RESISC45DataModule, EuroSATDataModule
from src.modules import CustomMultiLabelClassificationTask
from torchgeo.trainers import ClassificationTask

pl.seed_everything(0)

Global seed set to 0


0

### RESISC45

In [3]:
datamodule = RESISC45DataModule(root_dir="data/")
datamodule.setup()
dataset = datamodule.test_dataset

indices = random.sample(range(len(datamodule.test_dataset)), k=100)
batch = [dataset[idx] for idx in indices]
x = torch.stack([sample["image"] for sample in batch])
y = torch.stack([sample["label"] for sample in batch])
x, y = x.to("cuda"), y.to("cuda")

In [4]:
path = "logs/resisc45-robust/epoch=33-step=5032.ckpt"
model = CustomMultiLabelClassificationTask.load_from_checkpoint(path).model
model = model.to("cuda")

In [4]:
adversary = AutoAttack(model, norm="Linf", eps=8/255, version="custom", attacks_to_run=["apgd-ce", "apgd-dlr"])
adversary.apgd.n_restarts = 1
x_adv = adversary.run_standard_evaluation(x, y)

using custom version including apgd-ce, apgd-dlr
initial accuracy: 95.00%
apgd-ce - 1/1 - 53 out of 95 successfully perturbed
robust accuracy after APGD-CE: 42.00% (total time 22.2 s)
apgd-dlr - 1/1 - 29 out of 42 successfully perturbed
robust accuracy after APGD-DLR: 13.00% (total time 32.5 s)
max Linf perturbation: 2.55770, nan in tensor: 0, max: 3.55770, min: -2.05580
robust accuracy: 13.00%


In [5]:
adversary = AutoAttack(model, norm="L2", eps=1/2, version="custom", attacks_to_run=["apgd-ce", "apgd-dlr"])
adversary.apgd.n_restarts = 1
x_adv = adversary.run_standard_evaluation(x, y)

using custom version including apgd-ce, apgd-dlr
initial accuracy: 90.00%
apgd-ce - 1/1 - 4 out of 90 successfully perturbed
robust accuracy after APGD-CE: 86.00% (total time 22.7 s)
apgd-dlr - 1/1 - 0 out of 86 successfully perturbed
robust accuracy after APGD-DLR: 86.00% (total time 44.8 s)
max L2 perturbation: 604.56769, nan in tensor: 0, max: 3.55770, min: -2.05581
robust accuracy: 86.00%


In [6]:
path = "logs/resisc45-vanilla/epoch=39-step=5920.ckpt"
model = ClassificationTask.load_from_checkpoint(path).model
model = model.to("cuda")

In [7]:
adversary = AutoAttack(model, norm="Linf", eps=8/255, version="custom", attacks_to_run=["apgd-ce", "apgd-dlr"])
adversary.apgd.n_restarts = 1
x_adv = adversary.run_standard_evaluation(x, y)

using custom version including apgd-ce, apgd-dlr
initial accuracy: 93.00%
apgd-ce - 1/1 - 50 out of 93 successfully perturbed
robust accuracy after APGD-CE: 43.00% (total time 23.1 s)
apgd-dlr - 1/1 - 24 out of 43 successfully perturbed
robust accuracy after APGD-DLR: 19.00% (total time 34.6 s)
max Linf perturbation: 2.55770, nan in tensor: 0, max: 3.55770, min: -2.05580
robust accuracy: 19.00%


In [7]:
adversary = AutoAttack(model, norm="L2", eps=1/2, version="custom", attacks_to_run=["apgd-ce", "apgd-dlr"])
adversary.apgd.n_restarts = 1
x_adv = adversary.run_standard_evaluation(x, y)

using custom version including apgd-ce, apgd-dlr
initial accuracy: 95.00%
apgd-ce - 1/1 - 3 out of 95 successfully perturbed
robust accuracy after APGD-CE: 92.00% (total time 23.8 s)
apgd-dlr - 1/1 - 1 out of 92 successfully perturbed
robust accuracy after APGD-DLR: 91.00% (total time 47.5 s)
max L2 perturbation: 604.56769, nan in tensor: 0, max: 3.55770, min: -2.05581
robust accuracy: 91.00%


### EuroSAT

In [2]:
datamodule = EuroSATDataModule(root_dir="data/")
datamodule.setup()
dataset = datamodule.test_dataset

indices = random.sample(range(len(datamodule.test_dataset)), k=250)
batch = [dataset[idx] for idx in indices]
x = torch.stack([sample["image"] for sample in batch])
y = torch.stack([sample["label"] for sample in batch])
x, y = x.to("cuda"), y.to("cuda")

In [7]:
path = "logs/eurosat-robust/epoch=11-step=3048.ckpt"
model = CustomMultiLabelClassificationTask.load_from_checkpoint(path).model
model = model.to("cuda")

In [8]:
adversary = AutoAttack(model, norm="Linf", eps=8/255, version="custom", attacks_to_run=["apgd-ce", "apgd-dlr"])
adversary.apgd.n_restarts = 1
x_adv = adversary.run_standard_evaluation(x, y)

using custom version including apgd-ce, apgd-dlr
initial accuracy: 31.20%
apgd-ce - 1/1 - 56 out of 78 successfully perturbed
robust accuracy after APGD-CE: 8.80% (total time 15.6 s)
apgd-dlr - 1/1 - 17 out of 22 successfully perturbed
robust accuracy after APGD-DLR: 2.00% (total time 18.8 s)
max Linf perturbation: 5.52011, nan in tensor: 0, max: 0.00000, min: -5.52015
robust accuracy: 2.00%


In [9]:
adversary = AutoAttack(model, norm="L2", eps=1/2, version="custom", attacks_to_run=["apgd-ce", "apgd-dlr"])
adversary.apgd.n_restarts = 1
x_adv = adversary.run_standard_evaluation(x, y)

using custom version including apgd-ce, apgd-dlr
initial accuracy: 31.20%
apgd-ce - 1/1 - 56 out of 78 successfully perturbed
robust accuracy after APGD-CE: 8.80% (total time 16.0 s)
apgd-dlr - 1/1 - 17 out of 22 successfully perturbed
robust accuracy after APGD-DLR: 2.00% (total time 19.1 s)
max L2 perturbation: 612.65430, nan in tensor: 0, max: 0.00000, min: -5.52015
robust accuracy: 2.00%


In [5]:
path = "logs/eurosat-vanilla/epoch=47-step=6096.ckpt"
model = ClassificationTask.load_from_checkpoint(path).model
model = model.to("cuda")

In [14]:
adversary = AutoAttack(model, norm="Linf", eps=8/255, version="custom", attacks_to_run=["apgd-ce", "apgd-dlr"])
adversary.apgd.n_restarts = 1
x_adv = adversary.run_standard_evaluation(x, y)

using custom version including apgd-ce, apgd-dlr
initial accuracy: 45.20%
apgd-ce - 1/1 - 77 out of 113 successfully perturbed
robust accuracy after APGD-CE: 14.40% (total time 2.7 s)
apgd-dlr - 1/1 - 21 out of 36 successfully perturbed
robust accuracy after APGD-DLR: 6.00% (total time 5.2 s)
max Linf perturbation: 5.51925, nan in tensor: 0, max: 0.00000, min: -5.52015
robust accuracy: 6.00%


In [6]:
adversary = AutoAttack(model, norm="L2", eps=1/2, version="custom", attacks_to_run=["apgd-ce", "apgd-dlr"])
adversary.apgd.n_restarts = 1
x_adv = adversary.run_standard_evaluation(x, y)

using custom version including apgd-ce, apgd-dlr
initial accuracy: 45.20%
apgd-ce - 1/1 - 77 out of 113 successfully perturbed
robust accuracy after APGD-CE: 14.40% (total time 2.8 s)
apgd-dlr - 1/1 - 21 out of 36 successfully perturbed
robust accuracy after APGD-DLR: 6.00% (total time 6.1 s)
max L2 perturbation: 612.65430, nan in tensor: 0, max: 0.00000, min: -5.52015
robust accuracy: 6.00%
