Trusted-AI · beat-buesser · May 16, 2019 · Apr 26, 2019 · Apr 30, 2019 · Apr 30, 2019
diff --git a/.travis.yml b/.travis.yml
@@ -37,7 +37,6 @@ install:
   - conda install libgcc
   - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/travis/miniconda/envs/test-environment/lib
   - export PYTHONPATH=".":$PYTHONPATH
-  - pip freeze
 
 script:
-  - python -m unittest discover
+  - ./run_tests.sh
diff --git a/art/attacks/adversarial_patch.py b/art/attacks/adversarial_patch.py
@@ -85,14 +85,19 @@ def generate(self, x, y=None):
         """
         Generate adversarial samples and return them in an array.
 
-        :param x: An array with the original inputs.
+        :param x: An array with the original inputs. `x` is expected to have spatial dimensions.
         :type x: `np.ndarray`
         :param y: An array with the original labels to be predicted.
         :type y: `np.ndarray`
         :return: An array holding the adversarial patch.
         :rtype: `np.ndarray`
         """
         logger.info('Creating adversarial patch.')
+
+        if len(x.shape) == 2:
+            raise ValueError('Feature vectors detected. The adversarial patch can only be applied to data with spatial '
+                             'dimensions.')
+
         self.patch = (np.random.standard_normal(size=self.patch_shape)) * 20.0
 
         for i_step in range(self.max_iter):

diff --git a/art/attacks/boundary.py b/art/attacks/boundary.py
@@ -21,15 +21,16 @@
 
 import numpy as np
 
+from art import NUMPY_DTYPE
 from art.attacks.attack import Attack
 
 logger = logging.getLogger(__name__)
 
 
 class BoundaryAttack(Attack):
     """
-    Implementation of the boundary attack from Wieland Brendel et al. (2018).
-    Paper link: https://arxiv.org/abs/1712.04248
+    Implementation of the boundary attack from Wieland Brendel et al. (2018). This is a powerful black-box attack that
+    only requires final class prediction. Paper link: https://arxiv.org/abs/1712.04248
     """
     attack_params = Attack.attack_params + ['targeted', 'delta', 'epsilon', 'step_adapt', 'max_iter', 'sample_size',
                                             'init_size']
@@ -77,42 +78,38 @@ def generate(self, x, y=None):
         :return: An array holding the adversarial examples.
         :rtype: `np.ndarray`
         """
-        # Prediction from the original images
-        preds = np.argmax(self.classifier.predict(x), axis=1)
-
         # Assert that, if attack is targeted, y is provided
         if self.targeted and y is None:
             raise ValueError('Target labels `y` need to be provided for a targeted attack.')
 
         # Some initial setups
-        x_adv = x.copy()
+        x_adv = x.astype(NUMPY_DTYPE)
         if y is not None:
             y = np.argmax(y, axis=1)
+        preds = np.argmax(self.classifier.predict(x), axis=1)
 
         # Generate the adversarial samples
         for ind, val in enumerate(x_adv):
             if self.targeted:
-                x_ = self._perturb(x=val, y=y[ind], y_p=preds[ind])
+                x_adv[ind] = self._perturb(x=val, y_p=preds[ind], y=y[ind])
             else:
-                x_ = self._perturb(x=val, y=None, y_p=preds[ind])
-
-            x_adv[ind] = x_
+                x_adv[ind] = self._perturb(x=val, y_p=preds[ind])
 
         logger.info('Success rate of Boundary attack: %.2f%%',
                     (np.sum(preds != np.argmax(self.classifier.predict(x_adv), axis=1)) / x.shape[0]))
 
         return x_adv
 
-    def _perturb(self, x, y, y_p):
+    def _perturb(self, x, y_p, y=None):
         """
         Internal attack function for one example.
 
         :param x: An array with one original input to be attacked.
         :type x: `np.ndarray`
-        :param y: If `self.targeted` is true, then `y` represents the target label.
-        :type y: `int`
         :param y_p: The predicted label of x.
         :type y_p: `int`
+        :param y: If `self.targeted` is true, then `y` represents the target label.
+        :type y: `int`
         :return: an adversarial example.
         """
         # First, create an initial adversarial sample
@@ -148,11 +145,13 @@ def _attack(self, initial_sample, original_sample, target, initial_delta, initia
         :return: an adversarial example.
         :rtype: `np.ndarray`
         """
+        def compare(object1, object2):
+            return object1 == object2 if self.targeted else object1 != object2
+
         # Get initialization for some variables
         x_adv = initial_sample
         delta = initial_delta
         epsilon = initial_epsilon
-        clip_min, clip_max = self.classifier.clip_values
 
         # Main loop to wander around the boundary
         for _ in range(self.max_iter):
@@ -161,22 +160,15 @@ def _attack(self, initial_sample, original_sample, target, initial_delta, initia
                 potential_advs = []
                 for _ in range(self.sample_size):
                     potential_adv = x_adv + self._orthogonal_perturb(delta, x_adv, original_sample)
-                    potential_adv = np.clip(potential_adv, clip_min, clip_max)
+                    if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None:
+                        np.clip(potential_adv, self.classifier.clip_values[0], self.classifier.clip_values[1],
+                                out=potential_adv)
                     potential_advs.append(potential_adv)
 
                 preds = np.argmax(self.classifier.predict(np.array(potential_advs)), axis=1)
-
-                if self.targeted:
-                    satisfied = (preds == target)
-                else:
-                    satisfied = (preds != target)
-
+                satisfied = compare(preds, target)
                 delta_ratio = np.mean(satisfied)
-
-                if delta_ratio < 0.5:
-                    delta *= self.step_adapt
-                else:
-                    delta /= self.step_adapt
+                delta = delta * self.step_adapt if delta_ratio < .5 else delta / self.step_adapt
 
                 if delta_ratio > 0:
                     x_adv = potential_advs[np.where(satisfied)[0][0]]
@@ -191,15 +183,13 @@ def _attack(self, initial_sample, original_sample, target, initial_delta, initia
                 perturb = original_sample - x_adv
                 perturb *= epsilon
                 potential_adv = x_adv + perturb
-                potential_adv = np.clip(potential_adv, clip_min, clip_max)
-                pred = np.argmax(self.classifier.predict(np.array([potential_adv])), axis=1)[0]
+                if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None:
+                    np.clip(potential_adv, self.classifier.clip_values[0], self.classifier.clip_values[1],
+                            out=potential_adv)
 
-                if self.targeted:
-                    satisfied = (pred == target)
-                else:
-                    satisfied = (pred != target)
+                pred = np.argmax(self.classifier.predict(np.array([potential_adv])), axis=1)[0]
 
-                if satisfied:
+                if compare(pred, target):
                     x_adv = potential_adv
                     epsilon /= self.step_adapt
                     break
@@ -225,7 +215,7 @@ def _orthogonal_perturb(self, delta, current_sample, original_sample):
         :return: a possible perturbation.
         """
         # Generate perturbation randomly
-        perturb = np.random.randn(current_sample.shape[0], current_sample.shape[1], current_sample.shape[2])
+        perturb = np.random.randn(*self.classifier.input_shape)
 
         # Rescale the perturbation
         perturb /= np.linalg.norm(perturb)
@@ -254,52 +244,40 @@ def _orthogonal_perturb(self, delta, current_sample, original_sample):
 
         return perturb
 
-    def _init_sample(self, x, y, y_p):
+    def _init_sample(self, x, y, y_pred):
         """
         Find initial adversarial example for the attack.
 
         :param x: An array with 1 original input to be attacked.
         :type x: `np.ndarray`
         :param y: If `self.targeted` is true, then `y` represents the target label.
         :type y: `int`
-        :param y_p: The predicted label of x.
-        :type y_p: `int`
+        :param y_pred: The predicted label of x.
+        :type y_pred: `int`
         :return: an adversarial example.
         """
-        clip_min, clip_max = self.classifier.clip_values
         nprd = np.random.RandomState()
         initial_sample = None
 
-        if self.targeted:
-            # Attack satisfied
-            if y == y_p:
-                return None
-
-            # Attack unsatisfied yet
-            for _ in range(self.init_size):
-                random_img = nprd.uniform(clip_min, clip_max, size=x.shape).astype(x.dtype)
-                random_class = np.argmax(self.classifier.predict(np.array([random_img])), axis=1)[0]
-
-                if random_class == y:
-                    initial_sample = random_img
+        # Attack satisfied
+        if self.targeted and y == y_pred:
+            return None
 
-                    logging.info('Found initial adversarial image for targeted attack.')
-                    break
-            else:
-                logging.warning('Failed to draw a random image that is adversarial, attack failed.')
-
-        else:
-            for _ in range(self.init_size):
-                random_img = nprd.uniform(clip_min, clip_max, size=x.shape).astype(x.dtype)
-                random_class = np.argmax(self.classifier.predict(np.array([random_img])), axis=1)[0]
-
-                if random_class != y_p:
-                    initial_sample = random_img
-
-                    logging.info('Found initial adversarial image for untargeted attack.')
-                    break
+        # Attack unsatisfied yet
+        for _ in range(self.init_size):
+            if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None:
+                random_sample = nprd.uniform(self.classifier.clip_values[0], self.classifier.clip_values[1],
+                                             size=x.shape).astype(x.dtype)
             else:
-                logging.warning('Failed to draw a random image that is adversarial, attack failed.')
+                # TODO Adjust following feature-wise and for entire sample provided by user?
+                mean_, std_ = np.mean(x), np.std(x)
+                random_sample = nprd.normal(loc=mean_, scale=2 * std_, size=x.shape).astype(x.dtype)
+            random_class = np.argmax(self.classifier.predict(np.array([random_sample])), axis=1)[0]
+
+            if (self.targeted and random_class == y) or (not self.targeted and random_class != y_pred):
+                initial_sample = random_sample
+                logging.info('Found initial adversarial image for attack.')
+                break
 
         return initial_sample
 

diff --git a/art/attacks/carlini.py b/art/attacks/carlini.py
@@ -127,7 +127,7 @@ def _loss(self, x, x_adv, target, c):
 
         return z, l2dist, c*loss + l2dist
 
-    def _gradient_of_loss(self, z, target, x, x_adv, x_adv_tanh, c, clip_min, clip_max):
+    def _loss_gradient(self, z, target, x, x_adv, x_adv_tanh, c, clip_min, clip_max):
         """
         Compute the gradient of the loss function.
 
@@ -185,7 +185,10 @@ def generate(self, x, y=None):
         :rtype: `np.ndarray`
         """
         x_adv = x.astype(NUMPY_DTYPE)
-        (clip_min, clip_max) = self.classifier.clip_values
+        if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None:
+            clip_min, clip_max = self.classifier.clip_values
+        else:
+            clip_min, clip_max = np.amin(x), np.amax(x)
 
         # Assert that, if attack is targeted, y_val is provided:
         if self.targeted and y is None:
@@ -204,8 +207,7 @@ def generate(self, x, y=None):
             x_batch = x_adv[batch_index_1:batch_index_2]
             y_batch = y[batch_index_1:batch_index_2]
 
-            # The optimization is performed in tanh space to keep the
-            # adversarial images bounded from clip_min and clip_max.
+            # The optimization is performed in tanh space to keep the adversarial images bounded in correct range
             x_batch_tanh = original_to_tanh(x_batch, clip_min, clip_max, self._tanh_smoother)
 
             # Initialize binary search:
@@ -256,9 +258,9 @@ def generate(self, x, y=None):
 
                     # compute gradient:
                     logger.debug('Compute loss gradient')
-                    perturbation_tanh = -self._gradient_of_loss(z[active], y_batch[active], x_batch[active],
-                                                                x_adv_batch[active], x_adv_batch_tanh[active],
-                                                                c[active], clip_min, clip_max)
+                    perturbation_tanh = -self._loss_gradient(z[active], y_batch[active], x_batch[active],
+                                                             x_adv_batch[active], x_adv_batch_tanh[active],
+                                                             c[active], clip_min, clip_max)
 
                     # perform line search to optimize perturbation
                     # first, halve the learning rate until perturbation actually decreases the loss:
@@ -496,7 +498,7 @@ def _loss(self, x_adv, target):
 
         return z, loss
 
-    def _gradient_of_loss(self, z, target, x_adv, x_adv_tanh, clip_min, clip_max):
+    def _loss_gradient(self, z, target, x_adv, x_adv_tanh, clip_min, clip_max):
         """
         Compute the gradient of the loss function.
 
@@ -545,6 +547,11 @@ def generate(self, x, y=None):
         """
         x_adv = x.astype(NUMPY_DTYPE)
 
+        if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None:
+            clip_min_per_pixel, clip_max_per_pixel = self.classifier.clip_values
+        else:
+            clip_min_per_pixel, clip_max_per_pixel = np.amin(x), np.amax(x)
+
         # Assert that, if attack is targeted, y_val is provided:
         if self.targeted and y is None:
             raise ValueError('Target labels `y` need to be provided for a targeted attack.')
@@ -562,7 +569,7 @@ def generate(self, x, y=None):
             x_batch = x_adv[batch_index_1:batch_index_2]
             y_batch = y[batch_index_1:batch_index_2]
 
-            (clip_min_per_pixel, clip_max_per_pixel) = self.classifier.clip_values
+            # Determine values for later clipping
             clip_min = np.clip(x_batch - self.eps, clip_min_per_pixel, clip_max_per_pixel)
             clip_max = np.clip(x_batch + self.eps, clip_min_per_pixel, clip_max_per_pixel)
 
@@ -592,8 +599,8 @@ def generate(self, x, y=None):
 
                 # compute gradient:
                 logger.debug('Compute loss gradient')
-                perturbation_tanh = -self._gradient_of_loss(z[active], y_batch[active], x_adv_batch[active],
-                                                            x_adv_batch_tanh[active], clip_min[active], clip_max[active])
+                perturbation_tanh = -self._loss_gradient(z[active], y_batch[active], x_adv_batch[active],
+                                                         x_adv_batch_tanh[active], clip_min[active], clip_max[active])
 
                 # perform line search to optimize perturbation
                 # first, halve the learning rate until perturbation actually decreases the loss:

diff --git a/art/attacks/deepfool.py b/art/attacks/deepfool.py
@@ -21,6 +21,7 @@
 
 import numpy as np
 
+from art import NUMPY_DTYPE
 from art.attacks.attack import Attack
 
 logger = logging.getLogger(__name__)
@@ -64,8 +65,7 @@ def generate(self, x, y=None):
         :return: An array holding the adversarial examples.
         :rtype: `np.ndarray`
         """
-        clip_min, clip_max = self.classifier.clip_values
-        x_adv = x.copy()
+        x_adv = x.astype(NUMPY_DTYPE)
         preds = self.classifier.predict(x, logits=True)
 
         # Determine the class labels for which to compute the gradients
@@ -112,11 +112,16 @@ def generate(self, x, y=None):
                 value[np.arange(len(value)), labels_indices] = np.inf
                 l = np.argmin(value, axis=1)
                 r = (abs(f_diff[np.arange(len(f_diff)), l]) / (pow(np.linalg.norm(grad_diff[np.arange(len(
-                    grad_diff)), l].reshape(len(grad_diff), -1), axis=1), 2) + tol))[:, None, None, None] * \
-                    grad_diff[np.arange(len(grad_diff)), l]
+                    grad_diff)), l].reshape(len(grad_diff), -1), axis=1), 2) + tol))
+                r = r.reshape((-1,) + (1,) * (len(x.shape) - 1))
+                r = r * grad_diff[np.arange(len(grad_diff)), l]
 
                 # Add perturbation and clip result
-                batch[active_indices] = np.clip(batch[active_indices] + r[active_indices], clip_min, clip_max)
+                if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None:
+                    batch[active_indices] = np.clip(batch[active_indices] + r[active_indices],
+                                                    self.classifier.clip_values[0], self.classifier.clip_values[1])
+                else:
+                    batch[active_indices] += r[active_indices]
 
                 # Recompute prediction for new x
                 f = self.classifier.predict(batch, logits=True)
@@ -137,8 +142,11 @@ def generate(self, x, y=None):
                 current_step += 1
 
             # Apply overshoot parameter
-            x_adv[batch_index_1:batch_index_2] = np.clip(x_adv[batch_index_1:batch_index_2] + (
-                1 + self.epsilon) * (batch - x_adv[batch_index_1:batch_index_2]), clip_min, clip_max)
+            x_adv[batch_index_1:batch_index_2] = x_adv[batch_index_1:batch_index_2] + \
+                (1 + self.epsilon) * (batch - x_adv[batch_index_1:batch_index_2])
+            if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None:
+                np.clip(x_adv[batch_index_1:batch_index_2], self.classifier.clip_values[0],
+                        self.classifier.clip_values[1], out=x_adv[batch_index_1:batch_index_2])
 
         logger.info('Success rate of DeepFool attack: %.2f%%',
                     (np.sum(np.argmax(preds, axis=1) != np.argmax(self.classifier.predict(x_adv), axis=1)) /

diff --git a/art/attacks/elastic_net.py b/art/attacks/elastic_net.py
@@ -167,7 +167,6 @@ def generate(self, x, y=None):
         :rtype: `np.ndarray`
         """
         x_adv = x.astype(NUMPY_DTYPE)
-        (clip_min, clip_max) = self.classifier.clip_values
 
         # Assert that, if attack is targeted, y is provided:
         if self.targeted and y is None:
@@ -188,7 +187,8 @@ def generate(self, x, y=None):
             x_adv[batch_index_1:batch_index_2] = self._generate_batch(x_batch, y_batch)
 
         # Apply clip
-        x_adv = np.clip(x_adv, clip_min, clip_max)
+        if hasattr(self.classifier, 'clip_values') and self.classifier.clip_values is not None:
+            x_adv = np.clip(x_adv, self.classifier.clip_values[0], self.classifier.clip_values[1])
 
         # Compute success rate of the EAD attack
         logger.info('Success rate of EAD attack: %.2f%%',