diff --git a/README.rst b/README.rst
index badb755a..aef389b5 100644
--- a/README.rst
+++ b/README.rst
@@ -26,19 +26,16 @@ Installation
.. code-block:: bash
- # Foolbox 1.8
+ # Foolbox 2.0
pip install foolbox
-
- # Foolbox 2.0 release candidate
- pip install foolbox --pre
-Foolbox requires Python 3.5 or newer (since Foolbox 2.0).
+Foolbox 2.0 requires Python 3.5 or newer.
Documentation
-------------
Documentation for the `latest stable version `_ as well as
-`pre-release versions `_ is available on ReadTheDocs.
+`pre-release versions `_ is available on ReadTheDocs.
Our paper describing Foolbox is on arXiv: https://arxiv.org/abs/1707.04131
@@ -48,24 +45,55 @@ Example
.. code-block:: python
import foolbox
- import keras
import numpy as np
- from keras.applications.resnet50 import ResNet50
+ import torchvision.models as models
- # instantiate model
- keras.backend.set_learning_phase(0)
- kmodel = ResNet50(weights='imagenet')
- preprocessing = (np.array([104, 116, 123]), 1)
- fmodel = foolbox.models.KerasModel(kmodel, bounds=(0, 255), preprocessing=preprocessing)
+ # instantiate model (supports PyTorch, Keras, TensorFlow (Graph and Eager), MXNet and many more)
+ model = models.resnet18(pretrained=True).eval()
+ preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
+ fmodel = foolbox.models.PyTorchModel(model, bounds=(0, 1), num_classes=1000, preprocessing=preprocessing)
- # get source image and label
- image, label = foolbox.utils.imagenet_example()
+ # get a batch of images and labels and print the accuracy
+ images, labels = foolbox.utils.samples(dataset='imagenet', batchsize=16, data_format='channels_first', bounds=(0, 1))
+ print(np.mean(fmodel.forward(images).argmax(axis=-1) == labels))
+ # -> 0.9375
- # apply attack on source image
- # ::-1 reverses the color channels, because Keras ResNet50 expects BGR instead of RGB
+ # apply the attack
attack = foolbox.attacks.FGSM(fmodel)
- adversarial = attack(image[:, :, ::-1], label)
- # if the attack fails, adversarial will be None and a warning will be printed
+ adversarials = attack(images, labels)
+ # if the i'th image is misclassfied without a perturbation, then adversarials[i] will be the same as images[i]
+ # if the attack fails to find an adversarial for the i'th image, then adversarials[i] will all be np.nan
+
+ # Foolbox guarantees that all returned adversarials are in fact in adversarials
+ print(np.mean(fmodel.forward(adversarials).argmax(axis=-1) == labels))
+ # -> 0.0
+
+
+.. code-block:: python
+
+ # In rare cases, it can happen that attacks return adversarials that are so close to the decision boundary,
+ # that they actually might end up on the other (correct) side if you pass them through the model again like
+ # above to get the adversarial class. This is because models are not numerically deterministic (on GPU, some
+ # operations such as `sum` are non-deterministic by default) and indepedent between samples (an input might
+ # be classified differently depending on the other inputs in the same batch).
+
+ # You can always get the actual adversarial class that was observed for that sample by Foolbox by
+ # passing `unpack=False` to get the actual `Adversarial` objects:
+ attack = foolbox.attacks.FGSM(fmodel, distance=foolbox.distances.Linf)
+ adversarials = attack(images, labels, unpack=False)
+
+ adversarial_classes = np.asarray([a.adversarial_class for a in adversarials])
+ print(labels)
+ print(adversarial_classes)
+ print(np.mean(adversarial_classes == labels)) # will always be 0.0
+
+ # The `Adversarial` objects also provide a `distance` attribute. Note that the distances
+ # can be 0 (misclassified without perturbation) and inf (attack failed).
+ distances = np.asarray([a.distance.value for a in adversarials])
+ print("{:.1e}, {:.1e}, {:.1e}".format(distances.min(), np.median(distances), distances.max()))
+ print("{} of {} attacks failed".format(sum(adv.distance.value == np.inf for adv in adversarials), len(adversarials)))
+ print("{} of {} inputs misclassified without perturbation".format(sum(adv.distance.value == 0 for adv in adversarials), len(adversarials)))
+
For more examples, have a look at the `documentation `__.
@@ -78,26 +106,34 @@ Finally, the result can be plotted like this:
import matplotlib.pyplot as plt
+ image = images[0]
+ adversarial = attack(images[:1], labels[:1])[0]
+
+ # CHW to HWC
+ image = image.transpose(1, 2, 0)
+ adversarial = adversarial.transpose(1, 2, 0)
+
plt.figure()
plt.subplot(1, 3, 1)
plt.title('Original')
- plt.imshow(image / 255) # division by 255 to convert [0, 255] to [0, 1]
+ plt.imshow(image)
plt.axis('off')
plt.subplot(1, 3, 2)
plt.title('Adversarial')
- plt.imshow(adversarial[:, :, ::-1] / 255) # ::-1 to convert BGR to RGB
+ plt.imshow(adversarial)
plt.axis('off')
plt.subplot(1, 3, 3)
plt.title('Difference')
- difference = adversarial[:, :, ::-1] - image
+ difference = adversarial - image
plt.imshow(difference / abs(difference).max() * 0.2 + 0.5)
plt.axis('off')
plt.show()
+
.. image:: https://github.com/bethgelab/foolbox/raw/master/example.png
@@ -111,7 +147,7 @@ PyTorch, Theano, Lasagne and MXNet are available, e.g.
model = foolbox.models.PyTorchModel(torchmodel, bounds=(0, 255), num_classes=1000)
# etc.
-Different adversarial criteria such as Top-k, specific target classes or target probability
+Different adversarial criteria such as Top-k, specific target classes or target probability
values for the original class or the target class can be passed to the attack, e.g.
.. code-block:: python
diff --git a/docs/modules/adversarial.rst b/docs/modules/adversarial.rst
index e81ce7d6..90356213 100644
--- a/docs/modules/adversarial.rst
+++ b/docs/modules/adversarial.rst
@@ -5,3 +5,4 @@
.. autoclass:: Adversarial
:members:
+ :inherited-members:
diff --git a/docs/user/adversarial.rst b/docs/user/adversarial.rst
index 8a36e5eb..f31c56ec 100644
--- a/docs/user/adversarial.rst
+++ b/docs/user/adversarial.rst
@@ -4,6 +4,13 @@ Advanced
The :class:`Adversarial` class provides an advanced way to specify the adversarial example that should be found by an attack and provides detailed information about the created adversarial. In addition, it provides a way to improve a previously found adversarial example by re-running an attack.
+.. code-block:: python3
+
+ from foolbox.v1 import Adversarial
+ from foolbox.v1.attacks import LBFGSAttack
+ from foolbox.models import TenosrFlowModel
+ from foolbox.criteria import TargetClassProbability
+
Implicit
========
diff --git a/docs/user/development.rst b/docs/user/development.rst
index 5fa3d5ff..d527b715 100644
--- a/docs/user/development.rst
+++ b/docs/user/development.rst
@@ -27,3 +27,5 @@ New Adversarial Attacks
Foolbox makes it easy to develop new adversarial attacks that can be applied to arbitrary models.
To implement an attack, simply subclass the :class:`Attack` class, implement the :meth:`__call__` method and decorate it with the :decorator:`call_decorator`. The :decorator:`call_decorator` will make sure that your :meth:`__call__` implementation will be called with an instance of the :class:`Adversarial` class. You can use this instance to ask for model predictions and gradients, get the original image and its label and more. In addition, the :class:`Adversarial` instance automatically keeps track of the best adversarial amongst all the inputs tested by the attack. That way, the implementation of the attack can focus on the attack logic.
+
+To implement an attack that can make use of the batch support introduced in Foolbox 2.0, implement the :meth:`as_generator` method and decorate it with the :decorator:`generator_decorator`. All model calls using the :class:`Adversarial` object should use ``yield``.
diff --git a/docs/user/examples.rst b/docs/user/examples.rst
index c6e704ea..61c60039 100644
--- a/docs/user/examples.rst
+++ b/docs/user/examples.rst
@@ -4,6 +4,90 @@ Examples
Here you can find a collection of examples how Foolbox models can be created using different deep learning frameworks and some full-blown attack examples at the end.
+Running an attack
+=================
+
+Running a batch attack against a PyTorch model
+----------------------------------------------
+
+.. code-block:: python3
+
+ import foolbox
+ import numpy as np
+ import torchvision.models as models
+
+ # instantiate model (supports PyTorch, Keras, TensorFlow (Graph and Eager), MXNet and many more)
+ model = models.resnet18(pretrained=True).eval()
+ preprocessing = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], axis=-3)
+ fmodel = foolbox.models.PyTorchModel(model, bounds=(0, 1), num_classes=1000, preprocessing=preprocessing)
+
+ # get a batch of images and labels and print the accuracy
+ images, labels = foolbox.utils.samples(dataset='imagenet', batchsize=16, data_format='channels_first', bounds=(0, 1))
+ print(np.mean(fmodel.forward(images).argmax(axis=-1) == labels))
+ # -> 0.9375
+
+ # apply the attack
+ attack = foolbox.attacks.FGSM(fmodel)
+ adversarials = attack(images, labels)
+ # if the i'th image is misclassfied without a perturbation, then adversarials[i] will be the same as images[i]
+ # if the attack fails to find an adversarial for the i'th image, then adversarials[i] will all be np.nan
+
+ # Foolbox guarantees that all returned adversarials are in fact in adversarials
+ print(np.mean(fmodel.forward(adversarials).argmax(axis=-1) == labels))
+ # -> 0.0
+
+ # ---
+
+ # In rare cases, it can happen that attacks return adversarials that are so close to the decision boundary,
+ # that they actually might end up on the other (correct) side if you pass them through the model again like
+ # above to get the adversarial class. This is because models are not numerically deterministic (on GPU, some
+ # operations such as `sum` are non-deterministic by default) and indepedent between samples (an input might
+ # be classified differently depending on the other inputs in the same batch).
+
+ # You can always get the actual adversarial class that was observed for that sample by Foolbox by
+ # passing `unpack=False` to get the actual `Adversarial` objects:
+ attack = foolbox.attacks.FGSM(fmodel, distance=foolbox.distances.Linf)
+ adversarials = attack(images, labels, unpack=False)
+
+ adversarial_classes = np.asarray([a.adversarial_class for a in adversarials])
+ print(labels)
+ print(adversarial_classes)
+ print(np.mean(adversarial_classes == labels)) # will always be 0.0
+
+ # The `Adversarial` objects also provide a `distance` attribute. Note that the distances
+ # can be 0 (misclassified without perturbation) and inf (attack failed).
+ distances = np.asarray([a.distance.value for a in adversarials])
+ print("{:.1e}, {:.1e}, {:.1e}".format(distances.min(), np.median(distances), distances.max()))
+ print("{} of {} attacks failed".format(sum(adv.distance.value == np.inf for adv in adversarials), len(adversarials)))
+ print("{} of {} inputs misclassified without perturbation".format(sum(adv.distance.value == 0 for adv in adversarials), len(adversarials)))
+
+
+Running an attack on single sample against a Keras model
+--------------------------------------------------------
+
+.. code-block:: python3
+
+ import foolbox
+ import keras
+ import numpy as np
+ from keras.applications.resnet50 import ResNet50
+
+ # instantiate model
+ keras.backend.set_learning_phase(0)
+ kmodel = ResNet50(weights='imagenet')
+ preprocessing = (np.array([104, 116, 123]), 1)
+ fmodel = foolbox.models.KerasModel(kmodel, bounds=(0, 255), preprocessing=preprocessing)
+
+ # get source image and label
+ image, label = foolbox.utils.imagenet_example()
+
+ # apply attack on source image
+ # ::-1 reverses the color channels, because Keras ResNet50 expects BGR instead of RGB
+ attack = foolbox.v1.attacks.FGSM(fmodel)
+ adversarial = attack(image[:, :, ::-1], label)
+ # if the attack fails, adversarial will be None and a warning will be printed
+
+
Creating a model
================
@@ -136,7 +220,7 @@ FGSM (GradientSignAttack)
image, label = foolbox.utils.imagenet_example()
# apply attack on source image
- attack = foolbox.attacks.FGSM(fmodel)
+ attack = foolbox.v1.attacks.FGSM(fmodel)
adversarial = attack(image[:,:,::-1], label)
@@ -167,7 +251,7 @@ Creating an untargeted adversarial for a PyTorch model
print('predicted class', np.argmax(fmodel.forward_one(image)))
# apply attack on source image
- attack = foolbox.attacks.FGSM(fmodel)
+ attack = foolbox.v1.attacks.FGSM(fmodel)
adversarial = attack(image, label)
print('adversarial class', np.argmax(fmodel.forward_one(adversarial)))
diff --git a/docs/user/tutorial.rst b/docs/user/tutorial.rst
index b7fec492..78efb3fc 100644
--- a/docs/user/tutorial.rst
+++ b/docs/user/tutorial.rst
@@ -49,10 +49,7 @@ Finally, we can create and apply the attack:
from foolbox.attacks import LBFGSAttack
attack = LBFGSAttack(model, criterion)
-
- image = np.asarray(Image.open('example.jpg'))
- label = np.argmax(model.forward_one(image))
-
+ images, labels = foolbox.utils.samples(dataset='imagenet', batchsize=16, data_format='channels_last', bounds=(0, 255))
adversarial = attack(image, label=label)
diff --git a/foolbox/VERSION b/foolbox/VERSION
index 737f8e1e..227cea21 100644
--- a/foolbox/VERSION
+++ b/foolbox/VERSION
@@ -1 +1 @@
-2.0.0rc0
+2.0.0