Merge pull request #383 from jonasrauber/new_example

Version 2.0.0 with new example
bethgelab · Oct 23, 2019 · 9d73dea · 9d73dea
2 parents 4d6d7c1 + 5a04c89
commit 9d73dea
Show file tree

Hide file tree

Showing 7 changed files with 157 additions and 30 deletions.
diff --git a/README.rst b/README.rst
@@ -26,19 +26,16 @@ Installation
 
 .. code-block:: bash
 
-   # Foolbox 1.8
+   # Foolbox 2.0
    pip install foolbox
-   
-   # Foolbox 2.0 release candidate
-   pip install foolbox --pre
 
-Foolbox requires Python 3.5 or newer (since Foolbox 2.0).
+Foolbox 2.0 requires Python 3.5 or newer.
 
 Documentation
 -------------
 
 Documentation for the `latest stable version <https://foolbox.readthedocs.io/>`_ as well as
-`pre-release versions <https://foolbox.readthedocs.io/en/latest/>`_ is available on ReadTheDocs. 
+`pre-release versions <https://foolbox.readthedocs.io/en/latest/>`_ is available on ReadTheDocs.
 
 Our paper describing Foolbox is on arXiv: https://arxiv.org/abs/1707.04131
 
@@ -48,24 +45,55 @@ Example
 .. code-block:: python
 
    import foolbox
-   import keras
    import numpy as np
-   from keras.applications.resnet50 import ResNet50
+   import torchvision.models as models
 
-   # instantiate model
-   keras.backend.set_learning_phase(0)
-   kmodel = ResNet50(weights='imagenet')
-   preprocessing = (np.array([104, 116, 123]), 1)
-   fmodel = foolbox.models.KerasModel(kmodel, bounds=(0, 255), preprocessing=preprocessing)
+   # instantiate model (supports PyTorch, Keras, TensorFlow (Graph and Eager), MXNet and many more)
+   model = models.resnet18(pretrained=True).eval()
+   preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
+   fmodel = foolbox.models.PyTorchModel(model, bounds=(0, 1), num_classes=1000, preprocessing=preprocessing)
 
-   # get source image and label
-   image, label = foolbox.utils.imagenet_example()
+   # get a batch of images and labels and print the accuracy
+   images, labels = foolbox.utils.samples(dataset='imagenet', batchsize=16, data_format='channels_first', bounds=(0, 1))
+   print(np.mean(fmodel.forward(images).argmax(axis=-1) == labels))
+   # -> 0.9375
 
-   # apply attack on source image
-   # ::-1 reverses the color channels, because Keras ResNet50 expects BGR instead of RGB
+   # apply the attack
    attack = foolbox.attacks.FGSM(fmodel)
-   adversarial = attack(image[:, :, ::-1], label)
-   # if the attack fails, adversarial will be None and a warning will be printed
+   adversarials = attack(images, labels)
+   # if the i'th image is misclassfied without a perturbation, then adversarials[i] will be the same as images[i]
+   # if the attack fails to find an adversarial for the i'th image, then adversarials[i] will all be np.nan
+
+   # Foolbox guarantees that all returned adversarials are in fact in adversarials
+   print(np.mean(fmodel.forward(adversarials).argmax(axis=-1) == labels))
+   # -> 0.0
+
+
+.. code-block:: python
+
+   # In rare cases, it can happen that attacks return adversarials that are so close to the decision boundary,
+   # that they actually might end up on the other (correct) side if you pass them through the model again like
+   # above to get the adversarial class. This is because models are not numerically deterministic (on GPU, some
+   # operations such as `sum` are non-deterministic by default) and indepedent between samples (an input might
+   # be classified differently depending on the other inputs in the same batch).
+
+   # You can always get the actual adversarial class that was observed for that sample by Foolbox by
+   # passing `unpack=False` to get the actual `Adversarial` objects:
+   attack = foolbox.attacks.FGSM(fmodel, distance=foolbox.distances.Linf)
+   adversarials = attack(images, labels, unpack=False)
+
+   adversarial_classes = np.asarray([a.adversarial_class for a in adversarials])
+   print(labels)
+   print(adversarial_classes)
+   print(np.mean(adversarial_classes == labels))  # will always be 0.0
+
+   # The `Adversarial` objects also provide a `distance` attribute. Note that the distances
+   # can be 0 (misclassified without perturbation) and inf (attack failed).
+   distances = np.asarray([a.distance.value for a in adversarials])
+   print("{:.1e}, {:.1e}, {:.1e}".format(distances.min(), np.median(distances), distances.max()))
+   print("{} of {} attacks failed".format(sum(adv.distance.value == np.inf for adv in adversarials), len(adversarials)))
+   print("{} of {} inputs misclassified without perturbation".format(sum(adv.distance.value == 0 for adv in adversarials), len(adversarials)))
+
 
 For more examples, have a look at the `documentation <https://foolbox.readthedocs.io/en/latest/user/examples.html>`__.
 
@@ -78,26 +106,34 @@ Finally, the result can be plotted like this:
 
    import matplotlib.pyplot as plt
 
+   image = images[0]
+   adversarial = attack(images[:1], labels[:1])[0]
+
+   # CHW to HWC
+   image = image.transpose(1, 2, 0)
+   adversarial = adversarial.transpose(1, 2, 0)
+
    plt.figure()
 
    plt.subplot(1, 3, 1)
    plt.title('Original')
-   plt.imshow(image / 255)  # division by 255 to convert [0, 255] to [0, 1]
+   plt.imshow(image)
    plt.axis('off')
 
    plt.subplot(1, 3, 2)
    plt.title('Adversarial')
-   plt.imshow(adversarial[:, :, ::-1] / 255)  # ::-1 to convert BGR to RGB
+   plt.imshow(adversarial)
    plt.axis('off')
 
    plt.subplot(1, 3, 3)
    plt.title('Difference')
-   difference = adversarial[:, :, ::-1] - image
+   difference = adversarial - image
    plt.imshow(difference / abs(difference).max() * 0.2 + 0.5)
    plt.axis('off')
 
    plt.show()
 
+
 .. image:: https://github.com/bethgelab/foolbox/raw/master/example.png
 
 
@@ -111,7 +147,7 @@ PyTorch, Theano, Lasagne and MXNet are available, e.g.
    model = foolbox.models.PyTorchModel(torchmodel, bounds=(0, 255), num_classes=1000)
    # etc.
 
-Different adversarial criteria such as Top-k, specific target classes or target probability 
+Different adversarial criteria such as Top-k, specific target classes or target probability
 values for the original class or the target class can be passed to the attack, e.g.
 
 .. code-block:: python

diff --git a/docs/modules/adversarial.rst b/docs/modules/adversarial.rst
@@ -5,3 +5,4 @@
 
 .. autoclass:: Adversarial
    :members:
+   :inherited-members:
diff --git a/docs/user/adversarial.rst b/docs/user/adversarial.rst
@@ -4,6 +4,13 @@ Advanced
 
 The :class:`Adversarial` class provides an advanced way to specify the adversarial example that should be found by an attack and provides detailed information about the created adversarial. In addition, it provides a way to improve a previously found adversarial example by re-running an attack.
 
+.. code-block:: python3
+
+   from foolbox.v1 import Adversarial
+   from foolbox.v1.attacks import LBFGSAttack
+   from foolbox.models import TenosrFlowModel
+   from foolbox.criteria import TargetClassProbability
+
 
 Implicit
 ========

diff --git a/docs/user/development.rst b/docs/user/development.rst
@@ -27,3 +27,5 @@ New Adversarial Attacks
 Foolbox makes it easy to develop new adversarial attacks that can be applied to arbitrary models.
 
 To implement an attack, simply subclass the :class:`Attack` class, implement the :meth:`__call__` method and decorate it with the :decorator:`call_decorator`. The :decorator:`call_decorator` will make sure that your :meth:`__call__` implementation will be called with an instance of the :class:`Adversarial` class. You can use this instance to ask for model predictions and gradients, get the original image and its label and more. In addition, the :class:`Adversarial` instance automatically keeps track of the best adversarial amongst all the inputs tested by the attack. That way, the implementation of the attack can focus on the attack logic.
+
+To implement an attack that can make use of the batch support introduced in Foolbox 2.0, implement the :meth:`as_generator` method and decorate it with the :decorator:`generator_decorator`. All model calls using the :class:`Adversarial` object should use ``yield``.
diff --git a/docs/user/examples.rst b/docs/user/examples.rst
@@ -4,6 +4,90 @@ Examples
 
 Here you can find a collection of examples how Foolbox models can be created using different deep learning frameworks and some full-blown attack examples at the end.
 
+Running an attack
+=================
+
+Running a batch attack against a PyTorch model
+----------------------------------------------
+
+.. code-block:: python3
+
+   import foolbox
+   import numpy as np
+   import torchvision.models as models
+
+   # instantiate model (supports PyTorch, Keras, TensorFlow (Graph and Eager), MXNet and many more)
+   model = models.resnet18(pretrained=True).eval()
+   preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
+   fmodel = foolbox.models.PyTorchModel(model, bounds=(0, 1), num_classes=1000, preprocessing=preprocessing)
+
+   # get a batch of images and labels and print the accuracy
+   images, labels = foolbox.utils.samples(dataset='imagenet', batchsize=16, data_format='channels_first', bounds=(0, 1))
+   print(np.mean(fmodel.forward(images).argmax(axis=-1) == labels))
+   # -> 0.9375
+
+   # apply the attack
+   attack = foolbox.attacks.FGSM(fmodel)
+   adversarials = attack(images, labels)
+   # if the i'th image is misclassfied without a perturbation, then adversarials[i] will be the same as images[i]
+   # if the attack fails to find an adversarial for the i'th image, then adversarials[i] will all be np.nan
+
+   # Foolbox guarantees that all returned adversarials are in fact in adversarials
+   print(np.mean(fmodel.forward(adversarials).argmax(axis=-1) == labels))
+   # -> 0.0
+
+   # ---
+
+   # In rare cases, it can happen that attacks return adversarials that are so close to the decision boundary,
+   # that they actually might end up on the other (correct) side if you pass them through the model again like
+   # above to get the adversarial class. This is because models are not numerically deterministic (on GPU, some
+   # operations such as `sum` are non-deterministic by default) and indepedent between samples (an input might
+   # be classified differently depending on the other inputs in the same batch).
+
+   # You can always get the actual adversarial class that was observed for that sample by Foolbox by
+   # passing `unpack=False` to get the actual `Adversarial` objects:
+   attack = foolbox.attacks.FGSM(fmodel, distance=foolbox.distances.Linf)
+   adversarials = attack(images, labels, unpack=False)
+
+   adversarial_classes = np.asarray([a.adversarial_class for a in adversarials])
+   print(labels)
+   print(adversarial_classes)
+   print(np.mean(adversarial_classes == labels))  # will always be 0.0
+
+   # The `Adversarial` objects also provide a `distance` attribute. Note that the distances
+   # can be 0 (misclassified without perturbation) and inf (attack failed).
+   distances = np.asarray([a.distance.value for a in adversarials])
+   print("{:.1e}, {:.1e}, {:.1e}".format(distances.min(), np.median(distances), distances.max()))
+   print("{} of {} attacks failed".format(sum(adv.distance.value == np.inf for adv in adversarials), len(adversarials)))
+   print("{} of {} inputs misclassified without perturbation".format(sum(adv.distance.value == 0 for adv in adversarials), len(adversarials)))
+
+
+Running an attack on single sample against a Keras model
+--------------------------------------------------------
+
+.. code-block:: python3
+
+   import foolbox
+   import keras
+   import numpy as np
+   from keras.applications.resnet50 import ResNet50
+
+   # instantiate model
+   keras.backend.set_learning_phase(0)
+   kmodel = ResNet50(weights='imagenet')
+   preprocessing = (np.array([104, 116, 123]), 1)
+   fmodel = foolbox.models.KerasModel(kmodel, bounds=(0, 255), preprocessing=preprocessing)
+
+   # get source image and label
+   image, label = foolbox.utils.imagenet_example()
+
+   # apply attack on source image
+   # ::-1 reverses the color channels, because Keras ResNet50 expects BGR instead of RGB
+   attack = foolbox.v1.attacks.FGSM(fmodel)
+   adversarial = attack(image[:, :, ::-1], label)
+   # if the attack fails, adversarial will be None and a warning will be printed
+
+
 Creating a model
 ================
 
@@ -136,7 +220,7 @@ FGSM (GradientSignAttack)
    image, label = foolbox.utils.imagenet_example()
 
    # apply attack on source image
-   attack  = foolbox.attacks.FGSM(fmodel)
+   attack  = foolbox.v1.attacks.FGSM(fmodel)
    adversarial = attack(image[:,:,::-1], label)
 
 
@@ -167,7 +251,7 @@ Creating an untargeted adversarial for a PyTorch model
    print('predicted class', np.argmax(fmodel.forward_one(image)))
 
    # apply attack on source image
-   attack = foolbox.attacks.FGSM(fmodel)
+   attack = foolbox.v1.attacks.FGSM(fmodel)
    adversarial = attack(image, label)
 
    print('adversarial class', np.argmax(fmodel.forward_one(adversarial)))

diff --git a/docs/user/tutorial.rst b/docs/user/tutorial.rst
@@ -49,10 +49,7 @@ Finally, we can create and apply the attack:
    from foolbox.attacks import LBFGSAttack
 
    attack = LBFGSAttack(model, criterion)
-
-   image = np.asarray(Image.open('example.jpg'))
-   label = np.argmax(model.forward_one(image))
-
+   images, labels = foolbox.utils.samples(dataset='imagenet', batchsize=16, data_format='channels_last', bounds=(0, 255))
    adversarial = attack(image, label=label)
 
 

diff --git a/foolbox/VERSION b/foolbox/VERSION
@@ -1 +1 @@
-2.0.0rc0
+2.0.0