Merge pull request #590 from bethgelab/examples

improved the examples
bethgelab · Sep 23, 2020 · b328c78 · b328c78
2 parents 5ae0f10 + 32a156e
commit b328c78
Show file tree

Hide file tree

Showing 11 changed files with 298 additions and 140 deletions.
diff --git a/README.rst b/README.rst
@@ -63,7 +63,7 @@ You can see the versions we currently use for testing in the `Compatibility sect
 
 
 More examples can be found in the `examples <./examples/>`_ folder, e.g.
-a full `ResNet-18 example <./examples/pytorch_resnet18.py>`_.
+a full `ResNet-18 example <./examples/single_attack_pytorch_resnet18.py>`_.
 
 📄 Citation
 ------------

diff --git a/examples/README.md b/examples/README.md
@@ -0,0 +1,32 @@
+## Examples
+
+This folder contains examples that demonstrate how Foolbox can be used
+to run one or more adversarial attacks and how to use the returned results
+to compute the robust accuracy (the accuracy of the model when it is attacked).
+
+The standard example can be found in:
+* `single_attack_pytorch_resnet18.py`
+* `single_attack_tensorflow_resnet50.py`
+
+It shows how to run a single adversarial attack (Linf PGD) against an ImageNet
+model in PyTorch and TensorFlow.
+
+The remaining examples are all for PyTorch,
+but the difference between these frameworks is really just replacing the model
+at the beginning of the script. So any example can be easily run with any
+framework.
+
+`multiple_attacks_pytorch_resnet18.py` is an extended version of the single attack
+example. It shows how to combine the results of running multiple attacks
+to report the robust accuracy always using the strongest attack per sample.
+
+`spatial_attack_pytorch_resnet18.py` shows how to use the Spatial Attack. This attack
+is a bit special because it doesn't use Lp balls and instead considers translations
+and rotations. It therefore has a custom example. All the other attacks can be
+used like Linf PGD in the other examples above.
+
+`substituion_model_pytorch_resnet18.py` shows how to replace the gradient of
+a model with the gradient of another model. This can be useful when the original
+model has bad gradients ("gradient masking", "obfuscated gradients").
+
+The `zoo` folder shows how a model can be shared in a Foolbox Model Zoo compatible way.
diff --git a/examples/evaluate.py → ...ples/multiple_attacks_pytorch_resnet18.py b/examples/evaluate.py → ...ples/multiple_attacks_pytorch_resnet18.py
@@ -7,7 +7,7 @@
 
 
 if __name__ == "__main__":
-    # instantiate a model
+    # instantiate a model (could also be a TensorFlow or JAX model)
     model = models.resnet18(pretrained=True).eval()
     preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
     fmodel = PyTorchModel(model, bounds=(0, 1), preprocessing=preprocessing)
@@ -16,8 +16,8 @@
     # wrapping the tensors with ep.astensors is optional, but it allows
     # us to work with EagerPy tensors in the following
     images, labels = ep.astensors(*samples(fmodel, dataset="imagenet", batchsize=16))
-    print("accuracy")
-    print(accuracy(fmodel, images, labels))
+    clean_acc = accuracy(fmodel, images, labels)
+    print(f"clean accuracy:  {clean_acc * 100:.1f} %")
     print("")
 
     attacks = [
@@ -58,9 +58,16 @@
         print(attack)
         print("  ", 1.0 - success_.mean(axis=-1).round(2))
 
+    # calculate and report the robust accuracy (the accuracy of the model when
+    # it is attacked) using the best attack per sample
     robust_accuracy = 1.0 - attack_success.max(axis=0).mean(axis=-1)
     print("")
     print("-" * 79)
     print("")
     print("worst case (best attack per-sample)")
     print("  ", robust_accuracy.round(2))
+    print("")
+
+    print("robust accuracy for perturbations with")
+    for eps, acc in zip(epsilons, robust_accuracy):
+        print(f"  Linf norm ≤ {eps:<6}: {acc.item() * 100:4.1f} %")
diff --git a/examples/pytorch_resnet18.py b/examples/pytorch_resnet18.py
diff --git a/examples/single_attack_pytorch_resnet18.py b/examples/single_attack_pytorch_resnet18.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+"""
+A simple example that demonstrates how to run a single attack against
+a PyTorch ResNet-18 model for different epsilons and how to then report
+the robust accuracy.
+"""
+import torchvision.models as models
+import eagerpy as ep
+from foolbox import PyTorchModel, accuracy, samples
+from foolbox.attacks import LinfPGD
+
+
+def main() -> None:
+    # instantiate a model (could also be a TensorFlow or JAX model)
+    model = models.resnet18(pretrained=True).eval()
+    preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
+    fmodel = PyTorchModel(model, bounds=(0, 1), preprocessing=preprocessing)
+
+    # get data and test the model
+    # wrapping the tensors with ep.astensors is optional, but it allows
+    # us to work with EagerPy tensors in the following
+    images, labels = ep.astensors(*samples(fmodel, dataset="imagenet", batchsize=16))
+    clean_acc = accuracy(fmodel, images, labels)
+    print(f"clean accuracy:  {clean_acc * 100:.1f} %")
+
+    # apply the attack
+    attack = LinfPGD()
+    epsilons = [
+        0.0,
+        0.0002,
+        0.0005,
+        0.0008,
+        0.001,
+        0.0015,
+        0.002,
+        0.003,
+        0.01,
+        0.1,
+        0.3,
+        0.5,
+        1.0,
+    ]
+    raw_advs, clipped_advs, success = attack(fmodel, images, labels, epsilons=epsilons)
+
+    # calculate and report the robust accuracy (the accuracy of the model when
+    # it is attacked)
+    robust_accuracy = 1 - success.float32().mean(axis=-1)
+    print("robust accuracy for perturbations with")
+    for eps, acc in zip(epsilons, robust_accuracy):
+        print(f"  Linf norm ≤ {eps:<6}: {acc.item() * 100:4.1f} %")
+
+    # we can also manually check this
+    # we will use the clipped advs instead of the raw advs, otherwise
+    # we would need to check if the perturbation sizes are actually
+    # within the specified epsilon bound
+    print()
+    print("we can also manually check this:")
+    print()
+    print("robust accuracy for perturbations with")
+    for eps, advs_ in zip(epsilons, clipped_advs):
+        acc2 = accuracy(fmodel, advs_, labels)
+        print(f"  Linf norm ≤ {eps:<6}: {acc2 * 100:4.1f} %")
+        print("    perturbation sizes:")
+        perturbation_sizes = (advs_ - images).norms.linf(axis=(1, 2, 3)).numpy()
+        print("    ", str(perturbation_sizes).replace("\n", "\n" + "    "))
+        if acc2 == 0:
+            break
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/single_attack_tensorflow_resnet50.py b/examples/single_attack_tensorflow_resnet50.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+import tensorflow as tf
+import eagerpy as ep
+from foolbox import TensorFlowModel, accuracy, samples, Model
+from foolbox.attacks import LinfPGD
+
+
+def main() -> None:
+    # instantiate a model (could also be a TensorFlow or JAX model)
+    model = tf.keras.applications.ResNet50(weights="imagenet")
+    pre = dict(flip_axis=-1, mean=[104.0, 116.0, 123.0])  # RGB to BGR
+    fmodel: Model = TensorFlowModel(model, bounds=(0, 255), preprocessing=pre)
+    fmodel = fmodel.transform_bounds((0, 1))
+
+    # get data and test the model
+    # wrapping the tensors with ep.astensors is optional, but it allows
+    # us to work with EagerPy tensors in the following
+    images, labels = ep.astensors(*samples(fmodel, dataset="imagenet", batchsize=16))
+    clean_acc = accuracy(fmodel, images, labels)
+    print(f"clean accuracy:  {clean_acc * 100:.1f} %")
+
+    # apply the attack
+    attack = LinfPGD()
+    epsilons = [
+        0.0,
+        0.0002,
+        0.0005,
+        0.0008,
+        0.001,
+        0.0015,
+        0.002,
+        0.003,
+        0.01,
+        0.1,
+        0.3,
+        0.5,
+        1.0,
+    ]
+    raw_advs, clipped_advs, success = attack(fmodel, images, labels, epsilons=epsilons)
+
+    # calculate and report the robust accuracy (the accuracy of the model when
+    # it is attacked)
+    robust_accuracy = 1 - success.float32().mean(axis=-1)
+    print("robust accuracy for perturbations with")
+    for eps, acc in zip(epsilons, robust_accuracy):
+        print(f"  Linf norm ≤ {eps:<6}: {acc.item() * 100:4.1f} %")
+
+    # we can also manually check this
+    # we will use the clipped advs instead of the raw advs, otherwise
+    # we would need to check if the perturbation sizes are actually
+    # within the specified epsilon bound
+    print()
+    print("we can also manually check this:")
+    print()
+    print("robust accuracy for perturbations with")
+    for eps, advs_ in zip(epsilons, clipped_advs):
+        acc2 = accuracy(fmodel, advs_, labels)
+        print(f"  Linf norm ≤ {eps:<6}: {acc2 * 100:4.1f} %")
+        print("    perturbation sizes:")
+        perturbation_sizes = (advs_ - images).norms.linf(axis=(1, 2, 3)).numpy()
+        print("    ", str(perturbation_sizes).replace("\n", "\n" + "    "))
+        if acc2 == 0:
+            break
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/spatial_attack.py → examples/spatial_attack_pytorch_resnet18.py b/examples/spatial_attack.py → examples/spatial_attack_pytorch_resnet18.py
@@ -1,13 +1,17 @@
 #!/usr/bin/env python3
+"""
+The spatial attack is a very special attack because it tries to find adversarial
+perturbations using a set of translations and rotations rather then in an Lp ball.
+It therefore has a slightly different interface.
+"""
 import torchvision.models as models
 import eagerpy as ep
 from foolbox import PyTorchModel, accuracy, samples
 import foolbox.attacks as fa
-import numpy as np
 
 
-if __name__ == "__main__":
-    # instantiate a model
+def main() -> None:
+    # instantiate a model (could also be a TensorFlow or JAX model)
     model = models.resnet18(pretrained=True).eval()
     preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
     fmodel = PyTorchModel(model, bounds=(0, 1), preprocessing=preprocessing)
@@ -16,23 +20,33 @@
     # wrapping the tensors with ep.astensors is optional, but it allows
     # us to work with EagerPy tensors in the following
     images, labels = ep.astensors(*samples(fmodel, dataset="imagenet", batchsize=16))
-    print("accuracy")
-    print(accuracy(fmodel, images, labels))
-    print("")
+    clean_acc = accuracy(fmodel, images, labels) * 100
+    print(f"clean accuracy:  {clean_acc:.1f} %")
 
-    # attacktrys a combination of specified rotations and translations to an image
+    # the attack trys a combination of specified rotations and translations to an image
     # stops early if adversarial shifts and translations for all images are found
-    attack = fa.spatial_attack.SpatialAttack(
+    attack = fa.SpatialAttack(
         max_translation=6,  # 6px so x in [x-6, x+6] and y in [y-6, y+6]
         num_translations=6,  # number of translations in x, y.
         max_rotation=20,  # +- rotation in degrees
         num_rotations=5,  # number of rotations
         # max total iterations = num_rotations * num_translations**2
     )
 
+    # report the success rate of the attack (percentage of samples that could
+    # be adversarially perturbed) and the robust accuracy (the remaining accuracy
+    # of the model when it is attacked)
     xp_, _, success = attack(fmodel, images, labels)
+    suc = success.float32().mean().item() * 100
+    print(
+        f"attack success:  {suc:.1f} %"
+        " (for the specified rotation and translation bounds)"
+    )
     print(
-        "attack success in specified rotation in translation bounds",
-        success.numpy().astype(np.float32).mean() * 100,
-        " %",
+        f"robust accuracy: {100 - suc:.1f} %"
+        " (for the specified rotation and translation bounds)"
     )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/substituion_model.py b/examples/substituion_model.py