From 434d58281eefc29241e8c0a1dc4d8223c9fb13e1 Mon Sep 17 00:00:00 2001
From: Beat Buesser <beat.buesser@ie.ibm.com>
Date: Thu, 7 Oct 2021 23:09:03 +0100
Subject: [PATCH 01/12] Initial version of adversarial texture attack

Signed-off-by: Beat Buesser <beat.buesser@ie.ibm.com>
---
 .../evasion/adversarial_texture/__init__.py   |   0
 .../adversarial_texture_pytorch.py            | 753 ++++++++++++++++++
 2 files changed, 753 insertions(+)
 create mode 100644 art/attacks/evasion/adversarial_texture/__init__.py
 create mode 100644 art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py

diff --git a/art/attacks/evasion/adversarial_texture/__init__.py b/art/attacks/evasion/adversarial_texture/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
new file mode 100644
index 0000000000..fadf432edc
--- /dev/null
+++ b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
@@ -0,0 +1,753 @@
+# MIT License
+#
+# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2021
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+"""
+This module implements the adversarial patch attack `AdversarialPatch`. This attack generates an adversarial patch that
+can be printed into the physical world with a common printer. The patch can be used to fool image and video classifiers.
+
+| Paper link: https://arxiv.org/abs/1712.09665
+"""
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import logging
+from typing import Optional, Tuple, Union, TYPE_CHECKING
+
+import numpy as np
+from tqdm.auto import trange
+
+from art.attacks.attack import EvasionAttack
+from art.attacks.evasion.adversarial_patch.utils import insert_transformed_patch
+from art.estimators.estimator import BaseEstimator, NeuralNetworkMixin
+from art.estimators.classification.classifier import ClassifierMixin
+from art.utils import check_and_transform_label_format, is_probability, to_categorical
+
+if TYPE_CHECKING:
+    # pylint: disable=C0412
+    import torch
+
+    from art.utils import CLASSIFIER_NEURALNETWORK_TYPE
+
+logger = logging.getLogger(__name__)
+
+
+class AdversarialTexturePyTorch(EvasionAttack):
+    """
+    Implementation of the adversarial patch attack for square and rectangular images and videos in PyTorch.
+
+    | Paper link: https://arxiv.org/abs/1712.09665
+    """
+
+    attack_params = EvasionAttack.attack_params + [
+        "rotation_max",
+        "scale_min",
+        "scale_max",
+        "distortion_scale_max",
+        "step_size",
+        "max_iter",
+        "batch_size",
+        "patch_shape",
+        "tensor_board",
+        "verbose",
+    ]
+
+    _estimator_requirements = (BaseEstimator, )
+
+    def __init__(
+        self,
+        estimator,
+        rotation_max: float = 22.5,
+        scale_min: float = 0.1,
+        scale_max: float = 1.0,
+        distortion_scale_max: float = 0.0,
+        step_size: float = 1.0 / 255.0,
+        max_iter: int = 500,
+        batch_size: int = 16,
+        patch_shape: Optional[Tuple[int, int, int]] = None,
+        patch_type: str = "circle",
+        tensor_board: Union[str, bool] = False,
+        verbose: bool = True,
+        patch_height=0,
+        patch_width=0,
+        xmin=0,
+        ymin=0,
+    ):
+        """
+        Create an instance of the :class:`.AdversarialTexturePyTorch`.
+
+        :param estimator: A trained estimator.
+        :param rotation_max: The maximum rotation applied to random patches. The value is expected to be in the
+               range `[0, 180]`.
+        :param scale_min: The minimum scaling applied to random patches. The value should be in the range `[0, 1]`,
+               but less than `scale_max`.
+        :param scale_max: The maximum scaling applied to random patches. The value should be in the range `[0, 1]`, but
+               larger than `scale_min`.
+        :param distortion_scale_max: The maximum distortion scale for perspective transformation in range `[0, 1]`. If
+               distortion_scale_max=0.0 the perspective transformation sampling will be disabled.
+        :param step_size: The step size.
+        :param max_iter: The number of optimization steps.
+        :param batch_size: The size of the training batch.
+        :param patch_shape: The shape of the adversarial patch as a tuple of shape HWC (width, height, nb_channels).
+        :param patch_type: The patch type, either circle or square.
+        :param verbose: Show progress bars.
+        """
+        import torch  # lgtm [py/repeated-import]
+        import torchvision
+
+        # torch_version = list(map(int, torch.__version__.lower().split("+")[0].split(".")))
+        # torchvision_version = list(map(int, torchvision.__version__.lower().split("+")[0].split(".")))
+        # assert torch_version[0] >= 1 and torch_version[1] >= 7, "AdversarialPatchPyTorch requires torch>=1.7.0"
+        # assert (
+        #     torchvision_version[0] >= 0 and torchvision_version[1] >= 8
+        # ), "AdversarialPatchPyTorch requires torchvision>=0.8.0"
+
+        super().__init__(estimator=estimator, tensor_board=tensor_board)
+        self.rotation_max = rotation_max
+        self.scale_min = scale_min
+        self.scale_max = scale_max
+        self.distortion_scale_max = distortion_scale_max
+        self.step_size = step_size
+        self.max_iter = max_iter
+        self.batch_size = batch_size
+
+        self.patch_height = patch_height
+        self.patch_width = patch_width
+        # self.patch_height = int(patch_height / 600 * 224)
+        # self.patch_width = int(patch_width / 800 * 224)
+        self.xmin = xmin
+        self.ymin = ymin
+        # self.xmin = int(xmin / 600 * 224)
+        # self.ymin = int(ymin / 800 * 224)
+
+        self.image_shape = estimator.input_shape
+        self.input_shape = self.estimator.input_shape
+
+        self.nb_dims = len(self.image_shape)
+
+        # if patch_shape is None:
+            # if self.nb_dims == 3:
+            #     self.patch_shape = self.estimator.input_shape
+            # elif self.nb_dims == 4:
+            #      self.patch_shape = (self.estimator.input_shape[1], self.estimator.input_shape[2], self.estimator.input_shape[3])
+        self.patch_shape = (self.patch_height, self.patch_width, 3)
+        # else:
+        #     self.patch_shape = patch_shape
+        self.patch_type = patch_type
+
+        self.verbose = verbose
+        self._check_params()
+
+        # if not self.estimator.channels_first:
+        if self.estimator.channels_first:
+            raise ValueError("Input shape has to be either NHWC or NFHWC.")
+
+        # self.i_h_patch = 1
+        # self.i_w_patch = 2
+        self.i_h_patch = 0
+        self.i_w_patch = 1
+
+        if self.nb_dims == 3:
+            # self.i_h = 1
+            # self.i_w = 2
+            self.i_h = 0
+            self.i_w = 1
+        elif self.nb_dims == 4:
+            # self.i_h = 2
+            # self.i_w = 3
+            self.i_h = 1
+            self.i_w = 2
+
+        # if self.patch_shape[1] != self.patch_shape[2]:
+        # print(self.patch_shape)
+        # if self.patch_shape[0] != self.patch_shape[1]:
+        #     raise ValueError("Patch height and width need to be the same.")
+
+        if not (self.estimator.postprocessing_defences is None or self.estimator.postprocessing_defences == []):
+            raise ValueError(
+                "Framework-specific implementation of Adversarial Patch attack does not yet support "
+                + "postprocessing defences."
+            )
+
+        mean_value = (self.estimator.clip_values[1] - self.estimator.clip_values[0]) / 2.0 + self.estimator.clip_values[
+            0
+        ]
+        # print(self.patch_shape)
+        self._initial_value = np.ones(self.patch_shape) * mean_value
+        # self._patch = torch.tensor(self._initial_value, requires_grad=True, device=self.estimator.device)
+        self._patch = torch.from_numpy(self._initial_value)
+        self._patch.requires_grad = True
+        # self._patch.to(self.estimator.device)
+
+        # self._optimizer = torch.optim.SGD([self._patch], lr=1.0)
+
+    def _train_step(
+        self, images: "torch.Tensor", target: "torch.Tensor", mask: Optional["torch.Tensor"], y_init, foreground
+    ) -> "torch.Tensor":
+        import torch  # lgtm [py/repeated-import]
+
+        # self.estimator.model.zero_grad()
+        loss = self._loss(images, target, mask, y_init, foreground)
+        print('loss', loss)
+        loss.backward(retain_graph=True)
+        # self._optimizer.step()
+
+        # with torch.no_grad():
+        # print(self._patch)
+        # print(self._patch.grad)
+        gradients = self._patch.grad.sign() * self.step_size
+
+        with torch.no_grad():
+            self._patch[:] = torch.clamp(
+                self._patch + gradients, min=self.estimator.clip_values[0], max=self.estimator.clip_values[1]
+            )
+        # print(np.max(self._patch.detach().numpy()))
+
+        return loss
+
+    def _predictions(self, images: "torch.Tensor", mask: Optional["torch.Tensor"], y_init, foreground) -> "torch.Tensor":
+        import torch  # lgtm [py/repeated-import]
+
+        patched_input = self._random_overlay(images, self._patch, mask=mask, foreground=foreground)
+        patched_input = torch.clamp(
+            patched_input,
+            min=self.estimator.clip_values[0],
+            max=self.estimator.clip_values[1],
+        )
+
+        # predictions = self.estimator._predict_framework(patched_input)  # pylint: disable=W0212
+        predictions = self.estimator.predict(patched_input, y_init=y_init)  # pylint: disable=W0212
+        # predictions = self.estimator.predict(images, y_init=y_init)  # pylint: disable=W0212
+
+        return predictions
+
+    def _loss(self, images: "torch.Tensor", target: "torch.Tensor", mask: Optional["torch.Tensor"], y_init, foreground) -> "torch.Tensor":
+        import torch  # lgtm [py/repeated-import]
+
+        y_pred = self._predictions(images, mask, y_init, foreground)
+
+        # print(y_pred)
+        # print(target)
+        # asdf
+
+        # if self.use_logits:
+        #     loss = torch.nn.functional.cross_entropy(
+        #         input=predictions, target=torch.argmax(target, dim=1), reduction="mean"
+        #     )
+        # else:
+        #     loss = torch.nn.functional.nll_loss(input=predictions, target=torch.argmax(target, dim=1), reduction="mean")
+
+        # print(type(y_pred[0]["boxes"]))
+        # print(type(target["boxes"]))
+
+        loss = torch.nn.L1Loss(size_average=False)(y_pred[0]["boxes"].float(), target["boxes"][0].float())
+
+        return loss
+
+    def _get_circular_patch_mask(self, nb_samples: int, sharpness: int = 40) -> "torch.Tensor":
+        """
+        Return a circular patch mask.
+        """
+        import torch  # lgtm [py/repeated-import]
+
+        # print('self.patch_shape', self.patch_shape)
+        # print('self.i_h_patch', self.i_h_patch)
+        # print('self.i_w_patch', self.i_w_patch)
+
+        # diameter = np.minimum(self.patch_shape[self.i_h_patch], self.patch_shape[self.i_w_patch])
+        #
+        # if self.patch_type == "circle":
+        #     x = np.linspace(-1, 1, diameter)
+        #     y = np.linspace(-1, 1, diameter)
+        #     x_grid, y_grid = np.meshgrid(x, y, sparse=True)
+        #     z_grid = (x_grid ** 2 + y_grid ** 2) ** sharpness
+        #     image_mask = 1 - np.clip(z_grid, -1, 1)
+        # elif self.patch_type == "square":
+        #     # image_mask = np.ones((diameter, diameter))
+        image_mask = np.ones((self.patch_height, self.patch_width))
+
+        # image_mask = np.expand_dims(image_mask, axis=0)
+        image_mask = np.expand_dims(image_mask, axis=2)
+        # print(image_mask.shape, self.patch_shape)
+        image_mask = np.broadcast_to(image_mask, self.patch_shape)
+        image_mask = torch.Tensor(np.array(image_mask))
+        image_mask = torch.stack([image_mask] * nb_samples, dim=0)
+        return image_mask
+
+    def _random_overlay(
+        self,
+        images: "torch.Tensor",
+        patch: "torch.Tensor",
+        scale: Optional[float] = None,
+        mask: Optional["torch.Tensor"] = None,
+        foreground = None
+    ) -> "torch.Tensor":
+        import torch  # lgtm [py/repeated-import]
+        import torchvision
+
+        nb_samples = images.shape[0]
+
+        image_mask = self._get_circular_patch_mask(nb_samples=nb_samples)
+        image_mask = image_mask.float()
+
+        # print('foreground.shape', foreground.shape)
+
+        # smallest_image_edge = np.minimum(self.image_shape[self.i_h], self.image_shape[self.i_w])
+
+        # image_mask = image_mask.permute(0, 3, 1, 2)
+        # print(image_mask.shape)
+
+        # image_mask = torchvision.transforms.functional.resize(
+        #     img=image_mask,
+        #     size=(smallest_image_edge, smallest_image_edge),
+        #     interpolation=2,
+        # )
+
+        # image_mask = image_mask.permute(0, 2, 3, 1)
+
+        # print(image_mask.shape)
+        # asdf
+
+        # pad_h_before = int((self.image_shape[self.i_h] - image_mask.shape[self.i_h_patch + 1]) / 2)
+        # pad_h_after = int(self.image_shape[self.i_h] - pad_h_before - image_mask.shape[self.i_h_patch + 1])
+
+        pad_h_before = self.xmin
+        # print(self.image_shape[self.i_h], self.xmin)
+        # asdf
+        # print('image_mask.shape[self.i_h_patch + 1]',  image_mask.shape[self.i_h_patch + 1])
+        # print( image_mask.shape)
+        # pad_h_after = int(self.image_shape[self.i_h] - pad_h_before - image_mask.shape[self.i_h_patch + 1])
+        pad_h_after = int(images.shape[self.i_h+1] - pad_h_before - image_mask.shape[self.i_h_patch + 1])
+        # print(pad_h_after)
+        # asdf
+
+        # print('images.shape', images.shape)
+
+        # pad_w_before = int((self.image_shape[self.i_w] - image_mask.shape[self.i_w_patch + 1]) / 2)
+        # pad_w_after = int(self.image_shape[self.i_w] - pad_w_before - image_mask.shape[self.i_w_patch + 1])
+
+        pad_w_before = self.ymin
+        # pad_w_after = int(self.image_shape[self.i_w] - pad_w_before - image_mask.shape[self.i_w_patch + 1])
+        pad_w_after = int(images.shape[self.i_w+1] - pad_w_before - image_mask.shape[self.i_w_patch + 1])
+
+        # print(image_mask.shape)
+
+        image_mask = image_mask.permute(0, 3, 1, 2)
+
+        # print(pad_w_before, pad_w_after, pad_h_before, pad_h_after)
+
+        image_mask = torchvision.transforms.functional.pad(
+            img=image_mask,
+            padding=[pad_w_before, pad_h_before, pad_w_after, pad_h_after],
+            fill=0,
+            padding_mode="constant",
+        )
+
+        image_mask = image_mask.permute(0, 2, 3, 1)
+
+        # print(image_mask.shape)
+
+        if self.nb_dims == 4:
+            image_mask = torch.unsqueeze(image_mask, dim=1)
+            # image_mask = torch.repeat_interleave(image_mask, dim=1, repeats=self.input_shape[0])
+            image_mask = torch.repeat_interleave(image_mask, dim=1, repeats=images.shape[1])
+
+        # print(image_mask.shape)
+        # asdf
+
+        image_mask = image_mask.float()
+
+        patch = patch.float()
+        padded_patch = torch.stack([patch] * nb_samples)
+
+        padded_patch = padded_patch.permute(0, 3, 1, 2)
+
+        # print('padded_patch', padded_patch.shape)
+
+        # padded_patch = torchvision.transforms.functional.resize(
+        #     img=padded_patch,
+        #     size=(smallest_image_edge, smallest_image_edge),
+        #     interpolation=2,
+        # )
+
+        padded_patch = torchvision.transforms.functional.pad(
+            img=padded_patch,
+            # padding=[pad_h_before, pad_w_before, pad_h_after, pad_w_after],
+            padding=[pad_w_before, pad_h_before, pad_w_after, pad_h_after],
+            fill=0,
+            padding_mode="constant",
+        )
+
+        padded_patch = padded_patch.permute(0, 2, 3, 1)
+
+        if self.nb_dims == 4:
+            padded_patch = torch.unsqueeze(padded_patch, dim=1)
+            # padded_patch = torch.repeat_interleave(padded_patch, dim=1, repeats=self.input_shape[0])
+            padded_patch = torch.repeat_interleave(padded_patch, dim=1, repeats=images.shape[1])
+
+        # print('padded_patch', padded_patch.shape)
+        # sdfg
+
+        padded_patch = padded_patch.float()
+
+        # image_mask_list = list()
+        # padded_patch_list = list()
+        #
+        # for i_sample in range(nb_samples):
+        #     if scale is None:
+        #         im_scale = np.random.uniform(low=self.scale_min, high=self.scale_max)
+        #     else:
+        #         im_scale = scale
+        #
+        #     if mask is None:
+        #         padding_after_scaling_h = (
+        #             self.image_shape[self.i_h] - im_scale * padded_patch.shape[self.i_h + 1]
+        #         ) / 2.0
+        #         padding_after_scaling_w = (
+        #             self.image_shape[self.i_w] - im_scale * padded_patch.shape[self.i_w + 1]
+        #         ) / 2.0
+        #         x_shift = np.random.uniform(-padding_after_scaling_w, padding_after_scaling_w)
+        #         y_shift = np.random.uniform(-padding_after_scaling_h, padding_after_scaling_h)
+        #     else:
+        #         mask_2d = mask[i_sample, :, :]
+        #
+        #         edge_x_0 = int(im_scale * padded_patch.shape[self.i_w + 1]) // 2
+        #         edge_x_1 = int(im_scale * padded_patch.shape[self.i_w + 1]) - edge_x_0
+        #         edge_y_0 = int(im_scale * padded_patch.shape[self.i_h + 1]) // 2
+        #         edge_y_1 = int(im_scale * padded_patch.shape[self.i_h + 1]) - edge_y_0
+        #
+        #         mask_2d[0:edge_x_0, :] = False
+        #         if edge_x_1 > 0:
+        #             mask_2d[-edge_x_1:, :] = False
+        #         mask_2d[:, 0:edge_y_0] = False
+        #         if edge_y_1 > 0:
+        #             mask_2d[:, -edge_y_1:] = False
+        #
+        #         num_pos = np.argwhere(mask_2d).shape[0]
+        #         pos_id = np.random.choice(num_pos, size=1)
+        #         pos = np.argwhere(mask_2d)[pos_id[0]]
+        #         x_shift = pos[1] - self.image_shape[self.i_w] // 2
+        #         y_shift = pos[0] - self.image_shape[self.i_h] // 2
+        #
+        #     phi_rotate = float(np.random.uniform(-self.rotation_max, self.rotation_max))
+        #
+        #     image_mask_i = image_mask[i_sample]
+        #
+        #     height = padded_patch.shape[self.i_h + 1]
+        #     width = padded_patch.shape[self.i_w + 1]
+        #
+        #     half_height = height // 2
+        #     half_width = width // 2
+        #     topleft = [
+        #         int(torch.randint(0, int(self.distortion_scale_max * half_width) + 1, size=(1,)).item()),
+        #         int(torch.randint(0, int(self.distortion_scale_max * half_height) + 1, size=(1,)).item()),
+        #     ]
+        #     topright = [
+        #         int(torch.randint(width - int(self.distortion_scale_max * half_width) - 1, width, size=(1,)).item()),
+        #         int(torch.randint(0, int(self.distortion_scale_max * half_height) + 1, size=(1,)).item()),
+        #     ]
+        #     botright = [
+        #         int(torch.randint(width - int(self.distortion_scale_max * half_width) - 1, width, size=(1,)).item()),
+        #         int(torch.randint(height - int(self.distortion_scale_max * half_height) - 1, height, size=(1,)).item()),
+        #     ]
+        #     botleft = [
+        #         int(torch.randint(0, int(self.distortion_scale_max * half_width) + 1, size=(1,)).item()),
+        #         int(torch.randint(height - int(self.distortion_scale_max * half_height) - 1, height, size=(1,)).item()),
+        #     ]
+        #     startpoints = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]]
+        #     endpoints = [topleft, topright, botright, botleft]
+        #
+        #     image_mask_i = torchvision.transforms.functional.perspective(
+        #         img=image_mask_i, startpoints=startpoints, endpoints=endpoints, interpolation=2, fill=None
+        #     )
+        #
+        #     image_mask_i = torchvision.transforms.functional.affine(
+        #         img=image_mask_i,
+        #         angle=phi_rotate,
+        #         translate=[x_shift, y_shift],
+        #         scale=im_scale,
+        #         shear=[0, 0],
+        #         resample=0,
+        #         fillcolor=None,
+        #     )
+        #
+        #     image_mask_list.append(image_mask_i)
+        #
+        #     padded_patch_i = padded_patch[i_sample]
+        #
+        #     padded_patch_i = torchvision.transforms.functional.perspective(
+        #         img=padded_patch_i, startpoints=startpoints, endpoints=endpoints, interpolation=2, fill=None
+        #     )
+        #
+        #     padded_patch_i = torchvision.transforms.functional.affine(
+        #         img=padded_patch_i,
+        #         angle=phi_rotate,
+        #         translate=[x_shift, y_shift],
+        #         scale=im_scale,
+        #         shear=[0, 0],
+        #         resample=0,
+        #         fillcolor=None,
+        #     )
+        #
+        #     padded_patch_list.append(padded_patch_i)
+        #
+        # image_mask = torch.stack(image_mask_list, dim=0)
+        # padded_patch = torch.stack(padded_patch_list, dim=0)
+        inverted_mask = torch.from_numpy(np.ones(shape=image_mask.shape, dtype=np.float32)) - image_mask
+
+        # print("images.shape")
+        # print(images.shape)
+        # print("inverted_mask.shape")
+        # print(inverted_mask.shape)
+        # print("padded_patch.shape")
+        # print(padded_patch.shape)
+        # print("image_mask.shape")
+        # print(image_mask.shape)
+        # print("foreground.shape")
+        # print(foreground.shape)
+
+        # from matplotlib import pyplot as plt
+        #
+        # fig, axs = plt.subplots(2, 3)
+        #
+        # idx = 16
+
+        combined = images * inverted_mask \
+                   + padded_patch * image_mask \
+                   - padded_patch * ~foreground.bool() \
+                   + images * ~foreground.bool() * image_mask
+
+        # combined = padded_patch
+        # print('combined', combined)
+
+        # grad = images * inverted_mask + images * ~foreground.bool() * image_mask
+        # grad = images * ~foreground.bool() * image_mask
+
+        # axs[0, 0].imshow(images.detach().numpy()[0, idx, :, :, :])
+        # axs[0, 1].imshow(inverted_mask.detach().numpy()[0, idx, :, :, :])
+        # axs[0, 2].imshow(padded_patch.detach().numpy()[0, idx, :, :, :])
+        # axs[1, 0].imshow(image_mask.detach().numpy()[0, idx, :, :, :])
+        # axs[1, 1].imshow(foreground.detach().numpy()[0, idx, :, :, :])
+        # axs[1, 2].imshow(grad.detach().numpy()[0, idx, :, :, :])
+        # plt.show()
+        #
+        # lkj
+
+        return combined
+
+    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Generate an adversarial patch and return the patch and its mask in arrays.
+
+        :param x: An array with the original input images of shape NHWC or input videos of shape NFHWC.
+        :param y: An array with the original true labels.
+        :param mask: An boolean array of shape equal to the shape of a single samples (1, H, W) or the shape of `x`
+                     (N, H, W) without their channel dimensions. Any features for which the mask is True can be the
+                     center location of the patch during sampling.
+        :type mask: `np.ndarray`
+        :return: An array with adversarial patch and an array of the patch mask.
+        """
+        import torch  # lgtm [py/repeated-import]
+
+        shuffle = kwargs.get("shuffle", True)
+        # mask = kwargs.get("mask")
+        y_init = kwargs.get("y_init")
+        foreground = kwargs.get("foreground")
+
+        # if mask is not None:
+        #     mask = mask.copy()
+        # mask = self._check_mask(mask=mask, x=x)
+
+        # if y is None:
+        #     logger.info("Setting labels to estimator predictions and running untargeted attack because `y=None`.")
+        #     y = to_categorical(np.argmax(self.estimator.predict(x=x), axis=1), nb_classes=self.estimator.nb_classes)
+        #     self.targeted = False
+        # else:
+        #     self.targeted = True
+
+        # y = check_and_transform_label_format(labels=y, nb_classes=self.estimator.nb_classes)
+
+        # # check if logits or probabilities
+        # y_pred = self.estimator.predict(x=x[[0]])
+        #
+        # if is_probability(y_pred):
+        #     self.use_logits = False
+        # else:
+        #     self.use_logits = True
+
+        # x_tensor = torch.Tensor(x)
+        # y_tensor = torch.Tensor(y)
+
+        class TrackingDataset(torch.utils.data.Dataset):
+            def __init__(self, x, y, y_init, foreground):
+                # self.x = np.transpose(x, axes=(0, 1, 4, 2, 3))
+                # self.x = np.transpose(x, axes=(0, 1, 4, 2, 3))
+                self.x = x
+                self.y = y
+                self.y_init = y_init
+                # self.mask = mask
+                self.foreground = foreground
+
+            def __len__(self):
+                return self.x.shape[0]
+
+            def __getitem__(self, idx):
+                img = self.x[idx]
+
+                target = {}
+                target["boxes"] = torch.from_numpy(y[idx]["boxes"])
+                target["labels"] = y[idx]["labels"]
+
+                y_init_i = self.y_init[idx]
+                foreground_i = self.foreground[idx]
+
+                return img, target, y_init_i, foreground_i
+
+        dataset = TrackingDataset(x, y, y_init, foreground)
+        data_loader = torch.utils.data.DataLoader(
+            dataset=dataset,
+            batch_size=self.batch_size,
+            shuffle=shuffle,
+            drop_last=False,
+        )
+
+        from torch.nn.functional import interpolate
+
+        for i_iter in trange(self.max_iter, desc="Adversarial Texture PyTorch", disable=not self.verbose):
+            for images_i, target_i, y_init_i, foreground_i in data_loader:
+
+                # images_i = images_i.permute(0, 1, 4, 2, 3)
+                # images_i = interpolate(images_i, size=(3, 224, 224))
+                # images_i = images_i.permute(0, 1, 3, 4, 2)
+
+                # images_i = images_i.numpy()
+                # from matplotlib import pyplot as plt
+                # plt.imshow(images_i[0, 8, :, :, :])
+                # plt.show()
+
+                # foreground_i = foreground_i.permute(0, 1, 4, 2, 3)
+                # foreground_i = interpolate(foreground_i, size=(3, 224, 224))
+                # foreground_i = foreground_i.permute(0, 1, 3, 4, 2)
+
+                _ = self._train_step(images=images_i, target=target_i, mask=None, y_init=y_init_i, foreground=foreground_i)
+
+        # return (
+        #     self._patch.detach().cpu().numpy(),
+        #     self._get_circular_patch_mask(nb_samples=1).numpy()[0],
+        # )
+        return self.apply_patch(x=x, scale=1, foreground=foreground)
+
+    def _check_mask(self, mask: np.ndarray, x: np.ndarray) -> np.ndarray:
+        if mask is not None and (
+            (mask.dtype != np.bool)
+            or not (mask.shape[0] == 1 or mask.shape[0] == x.shape[0])
+            or not (mask.shape[1] == x.shape[self.i_h + 1] and mask.shape[2] == x.shape[self.i_w + 1])
+        ):
+            raise ValueError(
+                "The shape of `mask` has to be equal to the shape of a single samples (1, H, W) or the"
+                "shape of `x` (N, H, W) without their channel dimensions."
+            )
+
+        if mask is not None and mask.shape[0] == 1:
+            mask = np.repeat(mask, repeats=x.shape[0], axis=0)
+
+        return mask
+
+    def apply_patch(
+        self,
+        x: np.ndarray,
+        scale: float,
+        patch_external: Optional[np.ndarray] = None,
+        mask: Optional[np.ndarray] = None,
+        foreground=None,
+    ) -> np.ndarray:
+        """
+        A function to apply the learned adversarial patch to images or videos.
+
+        :param x: Instances to apply randomly transformed patch.
+        :param scale: Scale of the applied patch in relation to the classifier input shape.
+        :param patch_external: External patch to apply to images `x`.
+        :param mask: An boolean array of shape equal to the shape of a single samples (1, H, W) or the shape of `x`
+                     (N, H, W) without their channel dimensions. Any features for which the mask is True can be the
+                     center location of the patch during sampling.
+        :return: The patched samples.
+        """
+        import torch  # lgtm [py/repeated-import]
+
+        # if mask is not None:
+        #     mask = mask.copy()
+        # mask = self._check_mask(mask=mask, x=x)
+        patch = patch_external if patch_external is not None else self._patch
+        x = torch.Tensor(x)
+
+        from torch.nn.functional import interpolate
+
+        x_i = x
+        # x_i = x_i.permute(0, 1, 4, 2, 3)
+        # x_i = interpolate(x_i, size=(3, 224, 224))
+        # x_i = x_i.permute(0, 1, 3, 4, 2)
+
+        foreground = torch.Tensor(foreground)
+
+        from torch.nn.functional import interpolate
+
+        foreground_i = foreground
+        # foreground_i = foreground_i.permute(0, 1, 4, 2, 3)
+        # foreground_i = interpolate(foreground_i, size=(3, 224, 224))
+        # foreground_i = foreground_i.permute(0, 1, 3, 4, 2)
+
+
+        return self._random_overlay(images=x_i, patch=patch, scale=scale, mask=mask, foreground=foreground_i).detach().cpu().numpy()
+
+    def reset_patch(self, initial_patch_value: Optional[Union[float, np.ndarray]] = None) -> None:
+        """
+        Reset the adversarial patch.
+
+        :param initial_patch_value: Patch value to use for resetting the patch.
+        """
+        import torch  # lgtm [py/repeated-import]
+
+        if initial_patch_value is None:
+            self._patch.data = torch.Tensor(self._initial_value).double()
+        elif isinstance(initial_patch_value, float):
+            initial_value = np.ones(self.patch_shape) * initial_patch_value
+            self._patch.data = torch.Tensor(initial_value).double()
+        elif self._patch.shape == initial_patch_value.shape:
+            self._patch.data = torch.Tensor(initial_patch_value).double()
+        else:
+            raise ValueError("Unexpected value for initial_patch_value.")
+
+    @staticmethod
+    def insert_transformed_patch(x: np.ndarray, patch: np.ndarray, image_coords: np.ndarray):
+        """
+        Insert patch to image based on given or selected coordinates.
+
+        :param x: The image to insert the patch.
+        :param patch: The patch to be transformed and inserted.
+        :param image_coords: The coordinates of the 4 corners of the transformed, inserted patch of shape
+            [[x1, y1], [x2, y2], [x3, y3], [x4, y4]] in pixel units going in clockwise direction, starting with upper
+            left corner.
+        :return: The input `x` with the patch inserted.
+        """
+        return insert_transformed_patch(x, patch, image_coords)
+
+    def _check_params(self) -> None:
+        super()._check_params()
+
+        if not isinstance(self.distortion_scale_max, (float, int)) or 1.0 <= self.distortion_scale_max < 0.0:
+            raise ValueError("The maximum distortion scale has to be greater than or equal 0.0 or smaller than 1.0.")
+
+        if self.patch_type not in ["circle", "square"]:
+            raise ValueError("The patch type has to be either `circle` or `square`.")

From d1ec8eed443c8e223681aebdafe6c780fbba1ae8 Mon Sep 17 00:00:00 2001
From: Beat Buesser <beat.buesser@ie.ibm.com>
Date: Fri, 8 Oct 2021 12:25:18 +0100
Subject: [PATCH 02/12] Remove commented code

Signed-off-by: Beat Buesser <beat.buesser@ie.ibm.com>
---
 .../adversarial_texture_pytorch.py            | 508 ++----------------
 1 file changed, 57 insertions(+), 451 deletions(-)

diff --git a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
index fadf432edc..124badc0d6 100644
--- a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
+++ b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
@@ -16,31 +16,24 @@
 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 """
-This module implements the adversarial patch attack `AdversarialPatch`. This attack generates an adversarial patch that
-can be printed into the physical world with a common printer. The patch can be used to fool image and video classifiers.
+Implementation of the adversarial patch attack for square and rectangular images and videos in PyTorch.
 
 | Paper link: https://arxiv.org/abs/1712.09665
 """
-from __future__ import absolute_import, division, print_function, unicode_literals
-
 import logging
-from typing import Optional, Tuple, Union, TYPE_CHECKING
+from typing import Optional, Union, TYPE_CHECKING
 
 import numpy as np
 from tqdm.auto import trange
 
 from art.attacks.attack import EvasionAttack
 from art.attacks.evasion.adversarial_patch.utils import insert_transformed_patch
-from art.estimators.estimator import BaseEstimator, NeuralNetworkMixin
-from art.estimators.classification.classifier import ClassifierMixin
-from art.utils import check_and_transform_label_format, is_probability, to_categorical
+from art.estimators.estimator import BaseEstimator
 
 if TYPE_CHECKING:
     # pylint: disable=C0412
     import torch
 
-    from art.utils import CLASSIFIER_NEURALNETWORK_TYPE
-
 logger = logging.getLogger(__name__)
 
 
@@ -52,128 +45,71 @@ class AdversarialTexturePyTorch(EvasionAttack):
     """
 
     attack_params = EvasionAttack.attack_params + [
-        "rotation_max",
-        "scale_min",
-        "scale_max",
-        "distortion_scale_max",
+        "patch_height",
+        "patch_width",
+        "xmin",
+        "ymin",
         "step_size",
         "max_iter",
         "batch_size",
-        "patch_shape",
-        "tensor_board",
         "verbose",
     ]
 
-    _estimator_requirements = (BaseEstimator, )
+    _estimator_requirements = (BaseEstimator,)
 
     def __init__(
         self,
         estimator,
-        rotation_max: float = 22.5,
-        scale_min: float = 0.1,
-        scale_max: float = 1.0,
-        distortion_scale_max: float = 0.0,
+        patch_height: int = 0,
+        patch_width: int = 0,
+        x_min: int = 0,
+        y_min: int = 0,
         step_size: float = 1.0 / 255.0,
         max_iter: int = 500,
         batch_size: int = 16,
-        patch_shape: Optional[Tuple[int, int, int]] = None,
-        patch_type: str = "circle",
-        tensor_board: Union[str, bool] = False,
         verbose: bool = True,
-        patch_height=0,
-        patch_width=0,
-        xmin=0,
-        ymin=0,
     ):
         """
         Create an instance of the :class:`.AdversarialTexturePyTorch`.
 
         :param estimator: A trained estimator.
-        :param rotation_max: The maximum rotation applied to random patches. The value is expected to be in the
-               range `[0, 180]`.
-        :param scale_min: The minimum scaling applied to random patches. The value should be in the range `[0, 1]`,
-               but less than `scale_max`.
-        :param scale_max: The maximum scaling applied to random patches. The value should be in the range `[0, 1]`, but
-               larger than `scale_min`.
-        :param distortion_scale_max: The maximum distortion scale for perspective transformation in range `[0, 1]`. If
-               distortion_scale_max=0.0 the perspective transformation sampling will be disabled.
+        :param patch_height: Height of patch.
+        :param patch_width: Width of patch.
+        :param x_min: Height of patch.
+        :param y_min: Width of patch.
         :param step_size: The step size.
         :param max_iter: The number of optimization steps.
         :param batch_size: The size of the training batch.
-        :param patch_shape: The shape of the adversarial patch as a tuple of shape HWC (width, height, nb_channels).
-        :param patch_type: The patch type, either circle or square.
         :param verbose: Show progress bars.
         """
         import torch  # lgtm [py/repeated-import]
-        import torchvision
 
-        # torch_version = list(map(int, torch.__version__.lower().split("+")[0].split(".")))
-        # torchvision_version = list(map(int, torchvision.__version__.lower().split("+")[0].split(".")))
-        # assert torch_version[0] >= 1 and torch_version[1] >= 7, "AdversarialPatchPyTorch requires torch>=1.7.0"
-        # assert (
-        #     torchvision_version[0] >= 0 and torchvision_version[1] >= 8
-        # ), "AdversarialPatchPyTorch requires torchvision>=0.8.0"
-
-        super().__init__(estimator=estimator, tensor_board=tensor_board)
-        self.rotation_max = rotation_max
-        self.scale_min = scale_min
-        self.scale_max = scale_max
-        self.distortion_scale_max = distortion_scale_max
+        super().__init__(estimator=estimator)
         self.step_size = step_size
         self.max_iter = max_iter
         self.batch_size = batch_size
 
         self.patch_height = patch_height
         self.patch_width = patch_width
-        # self.patch_height = int(patch_height / 600 * 224)
-        # self.patch_width = int(patch_width / 800 * 224)
-        self.xmin = xmin
-        self.ymin = ymin
-        # self.xmin = int(xmin / 600 * 224)
-        # self.ymin = int(ymin / 800 * 224)
+        self.x_min = x_min
+        self.y_min = y_min
 
         self.image_shape = estimator.input_shape
         self.input_shape = self.estimator.input_shape
 
-        self.nb_dims = len(self.image_shape)
-
-        # if patch_shape is None:
-            # if self.nb_dims == 3:
-            #     self.patch_shape = self.estimator.input_shape
-            # elif self.nb_dims == 4:
-            #      self.patch_shape = (self.estimator.input_shape[1], self.estimator.input_shape[2], self.estimator.input_shape[3])
         self.patch_shape = (self.patch_height, self.patch_width, 3)
-        # else:
-        #     self.patch_shape = patch_shape
-        self.patch_type = patch_type
 
         self.verbose = verbose
         self._check_params()
 
-        # if not self.estimator.channels_first:
         if self.estimator.channels_first:
             raise ValueError("Input shape has to be either NHWC or NFHWC.")
 
-        # self.i_h_patch = 1
-        # self.i_w_patch = 2
         self.i_h_patch = 0
         self.i_w_patch = 1
 
-        if self.nb_dims == 3:
-            # self.i_h = 1
-            # self.i_w = 2
-            self.i_h = 0
-            self.i_w = 1
-        elif self.nb_dims == 4:
-            # self.i_h = 2
-            # self.i_w = 3
-            self.i_h = 1
-            self.i_w = 2
-
-        # if self.patch_shape[1] != self.patch_shape[2]:
-        # print(self.patch_shape)
-        # if self.patch_shape[0] != self.patch_shape[1]:
-        #     raise ValueError("Patch height and width need to be the same.")
+        self.i_h = 1
+        self.i_w = 2
 
         if not (self.estimator.postprocessing_defences is None or self.estimator.postprocessing_defences == []):
             raise ValueError(
@@ -184,116 +120,69 @@ def __init__(
         mean_value = (self.estimator.clip_values[1] - self.estimator.clip_values[0]) / 2.0 + self.estimator.clip_values[
             0
         ]
-        # print(self.patch_shape)
         self._initial_value = np.ones(self.patch_shape) * mean_value
-        # self._patch = torch.tensor(self._initial_value, requires_grad=True, device=self.estimator.device)
-        self._patch = torch.from_numpy(self._initial_value)
-        self._patch.requires_grad = True
+        self._patch = torch.tensor(self._initial_value, requires_grad=True, device=self.estimator.device)
+        # self._patch = torch.from_numpy(self._initial_value)
+        # self._patch.requires_grad = True
         # self._patch.to(self.estimator.device)
 
-        # self._optimizer = torch.optim.SGD([self._patch], lr=1.0)
-
     def _train_step(
-        self, images: "torch.Tensor", target: "torch.Tensor", mask: Optional["torch.Tensor"], y_init, foreground
+        self, images: "torch.Tensor", target: "torch.Tensor", y_init, foreground: Optional["torch.Tensor"]
     ) -> "torch.Tensor":
         import torch  # lgtm [py/repeated-import]
 
-        # self.estimator.model.zero_grad()
-        loss = self._loss(images, target, mask, y_init, foreground)
-        print('loss', loss)
+        self.estimator.model.zero_grad()
+        loss = self._loss(images, target, y_init, foreground)
         loss.backward(retain_graph=True)
-        # self._optimizer.step()
 
-        # with torch.no_grad():
-        # print(self._patch)
-        # print(self._patch.grad)
         gradients = self._patch.grad.sign() * self.step_size
 
         with torch.no_grad():
             self._patch[:] = torch.clamp(
                 self._patch + gradients, min=self.estimator.clip_values[0], max=self.estimator.clip_values[1]
             )
-        # print(np.max(self._patch.detach().numpy()))
 
         return loss
 
-    def _predictions(self, images: "torch.Tensor", mask: Optional["torch.Tensor"], y_init, foreground) -> "torch.Tensor":
+    def _predictions(self, images: "torch.Tensor", y_init, foreground) -> "torch.Tensor":
         import torch  # lgtm [py/repeated-import]
 
-        patched_input = self._random_overlay(images, self._patch, mask=mask, foreground=foreground)
+        patched_input = self._random_overlay(images, self._patch, foreground=foreground)
         patched_input = torch.clamp(
             patched_input,
             min=self.estimator.clip_values[0],
             max=self.estimator.clip_values[1],
         )
 
-        # predictions = self.estimator._predict_framework(patched_input)  # pylint: disable=W0212
         predictions = self.estimator.predict(patched_input, y_init=y_init)  # pylint: disable=W0212
-        # predictions = self.estimator.predict(images, y_init=y_init)  # pylint: disable=W0212
 
         return predictions
 
-    def _loss(self, images: "torch.Tensor", target: "torch.Tensor", mask: Optional["torch.Tensor"], y_init, foreground) -> "torch.Tensor":
+    def _loss(
+        self, images: "torch.Tensor", target: "torch.Tensor", y_init, foreground: Optional["torch.Tensor"]
+    ) -> "torch.Tensor":
         import torch  # lgtm [py/repeated-import]
 
-        y_pred = self._predictions(images, mask, y_init, foreground)
-
-        # print(y_pred)
-        # print(target)
-        # asdf
-
-        # if self.use_logits:
-        #     loss = torch.nn.functional.cross_entropy(
-        #         input=predictions, target=torch.argmax(target, dim=1), reduction="mean"
-        #     )
-        # else:
-        #     loss = torch.nn.functional.nll_loss(input=predictions, target=torch.argmax(target, dim=1), reduction="mean")
-
-        # print(type(y_pred[0]["boxes"]))
-        # print(type(target["boxes"]))
-
+        y_pred = self._predictions(images, y_init, foreground)
         loss = torch.nn.L1Loss(size_average=False)(y_pred[0]["boxes"].float(), target["boxes"][0].float())
 
         return loss
 
-    def _get_circular_patch_mask(self, nb_samples: int, sharpness: int = 40) -> "torch.Tensor":
+    def _get_circular_patch_mask(self, nb_samples: int) -> "torch.Tensor":
         """
         Return a circular patch mask.
         """
         import torch  # lgtm [py/repeated-import]
 
-        # print('self.patch_shape', self.patch_shape)
-        # print('self.i_h_patch', self.i_h_patch)
-        # print('self.i_w_patch', self.i_w_patch)
-
-        # diameter = np.minimum(self.patch_shape[self.i_h_patch], self.patch_shape[self.i_w_patch])
-        #
-        # if self.patch_type == "circle":
-        #     x = np.linspace(-1, 1, diameter)
-        #     y = np.linspace(-1, 1, diameter)
-        #     x_grid, y_grid = np.meshgrid(x, y, sparse=True)
-        #     z_grid = (x_grid ** 2 + y_grid ** 2) ** sharpness
-        #     image_mask = 1 - np.clip(z_grid, -1, 1)
-        # elif self.patch_type == "square":
-        #     # image_mask = np.ones((diameter, diameter))
         image_mask = np.ones((self.patch_height, self.patch_width))
 
-        # image_mask = np.expand_dims(image_mask, axis=0)
         image_mask = np.expand_dims(image_mask, axis=2)
-        # print(image_mask.shape, self.patch_shape)
         image_mask = np.broadcast_to(image_mask, self.patch_shape)
         image_mask = torch.Tensor(np.array(image_mask))
         image_mask = torch.stack([image_mask] * nb_samples, dim=0)
         return image_mask
 
-    def _random_overlay(
-        self,
-        images: "torch.Tensor",
-        patch: "torch.Tensor",
-        scale: Optional[float] = None,
-        mask: Optional["torch.Tensor"] = None,
-        foreground = None
-    ) -> "torch.Tensor":
+    def _random_overlay(self, images: "torch.Tensor", patch: "torch.Tensor", foreground=None) -> "torch.Tensor":
         import torch  # lgtm [py/repeated-import]
         import torchvision
 
@@ -302,52 +191,14 @@ def _random_overlay(
         image_mask = self._get_circular_patch_mask(nb_samples=nb_samples)
         image_mask = image_mask.float()
 
-        # print('foreground.shape', foreground.shape)
+        pad_h_before = self.x_min
+        pad_h_after = int(images.shape[self.i_h + 1] - pad_h_before - image_mask.shape[self.i_h_patch + 1])
 
-        # smallest_image_edge = np.minimum(self.image_shape[self.i_h], self.image_shape[self.i_w])
-
-        # image_mask = image_mask.permute(0, 3, 1, 2)
-        # print(image_mask.shape)
-
-        # image_mask = torchvision.transforms.functional.resize(
-        #     img=image_mask,
-        #     size=(smallest_image_edge, smallest_image_edge),
-        #     interpolation=2,
-        # )
-
-        # image_mask = image_mask.permute(0, 2, 3, 1)
-
-        # print(image_mask.shape)
-        # asdf
-
-        # pad_h_before = int((self.image_shape[self.i_h] - image_mask.shape[self.i_h_patch + 1]) / 2)
-        # pad_h_after = int(self.image_shape[self.i_h] - pad_h_before - image_mask.shape[self.i_h_patch + 1])
-
-        pad_h_before = self.xmin
-        # print(self.image_shape[self.i_h], self.xmin)
-        # asdf
-        # print('image_mask.shape[self.i_h_patch + 1]',  image_mask.shape[self.i_h_patch + 1])
-        # print( image_mask.shape)
-        # pad_h_after = int(self.image_shape[self.i_h] - pad_h_before - image_mask.shape[self.i_h_patch + 1])
-        pad_h_after = int(images.shape[self.i_h+1] - pad_h_before - image_mask.shape[self.i_h_patch + 1])
-        # print(pad_h_after)
-        # asdf
-
-        # print('images.shape', images.shape)
-
-        # pad_w_before = int((self.image_shape[self.i_w] - image_mask.shape[self.i_w_patch + 1]) / 2)
-        # pad_w_after = int(self.image_shape[self.i_w] - pad_w_before - image_mask.shape[self.i_w_patch + 1])
-
-        pad_w_before = self.ymin
-        # pad_w_after = int(self.image_shape[self.i_w] - pad_w_before - image_mask.shape[self.i_w_patch + 1])
-        pad_w_after = int(images.shape[self.i_w+1] - pad_w_before - image_mask.shape[self.i_w_patch + 1])
-
-        # print(image_mask.shape)
+        pad_w_before = self.y_min
+        pad_w_after = int(images.shape[self.i_w + 1] - pad_w_before - image_mask.shape[self.i_w_patch + 1])
 
         image_mask = image_mask.permute(0, 3, 1, 2)
 
-        # print(pad_w_before, pad_w_after, pad_h_before, pad_h_after)
-
         image_mask = torchvision.transforms.functional.pad(
             img=image_mask,
             padding=[pad_w_before, pad_h_before, pad_w_after, pad_h_after],
@@ -357,15 +208,8 @@ def _random_overlay(
 
         image_mask = image_mask.permute(0, 2, 3, 1)
 
-        # print(image_mask.shape)
-
-        if self.nb_dims == 4:
-            image_mask = torch.unsqueeze(image_mask, dim=1)
-            # image_mask = torch.repeat_interleave(image_mask, dim=1, repeats=self.input_shape[0])
-            image_mask = torch.repeat_interleave(image_mask, dim=1, repeats=images.shape[1])
-
-        # print(image_mask.shape)
-        # asdf
+        image_mask = torch.unsqueeze(image_mask, dim=1)
+        image_mask = torch.repeat_interleave(image_mask, dim=1, repeats=images.shape[1])
 
         image_mask = image_mask.float()
 
@@ -374,17 +218,8 @@ def _random_overlay(
 
         padded_patch = padded_patch.permute(0, 3, 1, 2)
 
-        # print('padded_patch', padded_patch.shape)
-
-        # padded_patch = torchvision.transforms.functional.resize(
-        #     img=padded_patch,
-        #     size=(smallest_image_edge, smallest_image_edge),
-        #     interpolation=2,
-        # )
-
         padded_patch = torchvision.transforms.functional.pad(
             img=padded_patch,
-            # padding=[pad_h_before, pad_w_before, pad_h_after, pad_w_after],
             padding=[pad_w_before, pad_h_before, pad_w_after, pad_h_after],
             fill=0,
             padding_mode="constant",
@@ -392,162 +227,23 @@ def _random_overlay(
 
         padded_patch = padded_patch.permute(0, 2, 3, 1)
 
-        if self.nb_dims == 4:
-            padded_patch = torch.unsqueeze(padded_patch, dim=1)
-            # padded_patch = torch.repeat_interleave(padded_patch, dim=1, repeats=self.input_shape[0])
-            padded_patch = torch.repeat_interleave(padded_patch, dim=1, repeats=images.shape[1])
-
-        # print('padded_patch', padded_patch.shape)
-        # sdfg
+        padded_patch = torch.unsqueeze(padded_patch, dim=1)
+        padded_patch = torch.repeat_interleave(padded_patch, dim=1, repeats=images.shape[1])
 
         padded_patch = padded_patch.float()
 
-        # image_mask_list = list()
-        # padded_patch_list = list()
-        #
-        # for i_sample in range(nb_samples):
-        #     if scale is None:
-        #         im_scale = np.random.uniform(low=self.scale_min, high=self.scale_max)
-        #     else:
-        #         im_scale = scale
-        #
-        #     if mask is None:
-        #         padding_after_scaling_h = (
-        #             self.image_shape[self.i_h] - im_scale * padded_patch.shape[self.i_h + 1]
-        #         ) / 2.0
-        #         padding_after_scaling_w = (
-        #             self.image_shape[self.i_w] - im_scale * padded_patch.shape[self.i_w + 1]
-        #         ) / 2.0
-        #         x_shift = np.random.uniform(-padding_after_scaling_w, padding_after_scaling_w)
-        #         y_shift = np.random.uniform(-padding_after_scaling_h, padding_after_scaling_h)
-        #     else:
-        #         mask_2d = mask[i_sample, :, :]
-        #
-        #         edge_x_0 = int(im_scale * padded_patch.shape[self.i_w + 1]) // 2
-        #         edge_x_1 = int(im_scale * padded_patch.shape[self.i_w + 1]) - edge_x_0
-        #         edge_y_0 = int(im_scale * padded_patch.shape[self.i_h + 1]) // 2
-        #         edge_y_1 = int(im_scale * padded_patch.shape[self.i_h + 1]) - edge_y_0
-        #
-        #         mask_2d[0:edge_x_0, :] = False
-        #         if edge_x_1 > 0:
-        #             mask_2d[-edge_x_1:, :] = False
-        #         mask_2d[:, 0:edge_y_0] = False
-        #         if edge_y_1 > 0:
-        #             mask_2d[:, -edge_y_1:] = False
-        #
-        #         num_pos = np.argwhere(mask_2d).shape[0]
-        #         pos_id = np.random.choice(num_pos, size=1)
-        #         pos = np.argwhere(mask_2d)[pos_id[0]]
-        #         x_shift = pos[1] - self.image_shape[self.i_w] // 2
-        #         y_shift = pos[0] - self.image_shape[self.i_h] // 2
-        #
-        #     phi_rotate = float(np.random.uniform(-self.rotation_max, self.rotation_max))
-        #
-        #     image_mask_i = image_mask[i_sample]
-        #
-        #     height = padded_patch.shape[self.i_h + 1]
-        #     width = padded_patch.shape[self.i_w + 1]
-        #
-        #     half_height = height // 2
-        #     half_width = width // 2
-        #     topleft = [
-        #         int(torch.randint(0, int(self.distortion_scale_max * half_width) + 1, size=(1,)).item()),
-        #         int(torch.randint(0, int(self.distortion_scale_max * half_height) + 1, size=(1,)).item()),
-        #     ]
-        #     topright = [
-        #         int(torch.randint(width - int(self.distortion_scale_max * half_width) - 1, width, size=(1,)).item()),
-        #         int(torch.randint(0, int(self.distortion_scale_max * half_height) + 1, size=(1,)).item()),
-        #     ]
-        #     botright = [
-        #         int(torch.randint(width - int(self.distortion_scale_max * half_width) - 1, width, size=(1,)).item()),
-        #         int(torch.randint(height - int(self.distortion_scale_max * half_height) - 1, height, size=(1,)).item()),
-        #     ]
-        #     botleft = [
-        #         int(torch.randint(0, int(self.distortion_scale_max * half_width) + 1, size=(1,)).item()),
-        #         int(torch.randint(height - int(self.distortion_scale_max * half_height) - 1, height, size=(1,)).item()),
-        #     ]
-        #     startpoints = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]]
-        #     endpoints = [topleft, topright, botright, botleft]
-        #
-        #     image_mask_i = torchvision.transforms.functional.perspective(
-        #         img=image_mask_i, startpoints=startpoints, endpoints=endpoints, interpolation=2, fill=None
-        #     )
-        #
-        #     image_mask_i = torchvision.transforms.functional.affine(
-        #         img=image_mask_i,
-        #         angle=phi_rotate,
-        #         translate=[x_shift, y_shift],
-        #         scale=im_scale,
-        #         shear=[0, 0],
-        #         resample=0,
-        #         fillcolor=None,
-        #     )
-        #
-        #     image_mask_list.append(image_mask_i)
-        #
-        #     padded_patch_i = padded_patch[i_sample]
-        #
-        #     padded_patch_i = torchvision.transforms.functional.perspective(
-        #         img=padded_patch_i, startpoints=startpoints, endpoints=endpoints, interpolation=2, fill=None
-        #     )
-        #
-        #     padded_patch_i = torchvision.transforms.functional.affine(
-        #         img=padded_patch_i,
-        #         angle=phi_rotate,
-        #         translate=[x_shift, y_shift],
-        #         scale=im_scale,
-        #         shear=[0, 0],
-        #         resample=0,
-        #         fillcolor=None,
-        #     )
-        #
-        #     padded_patch_list.append(padded_patch_i)
-        #
-        # image_mask = torch.stack(image_mask_list, dim=0)
-        # padded_patch = torch.stack(padded_patch_list, dim=0)
         inverted_mask = torch.from_numpy(np.ones(shape=image_mask.shape, dtype=np.float32)) - image_mask
 
-        # print("images.shape")
-        # print(images.shape)
-        # print("inverted_mask.shape")
-        # print(inverted_mask.shape)
-        # print("padded_patch.shape")
-        # print(padded_patch.shape)
-        # print("image_mask.shape")
-        # print(image_mask.shape)
-        # print("foreground.shape")
-        # print(foreground.shape)
-
-        # from matplotlib import pyplot as plt
-        #
-        # fig, axs = plt.subplots(2, 3)
-        #
-        # idx = 16
-
-        combined = images * inverted_mask \
-                   + padded_patch * image_mask \
-                   - padded_patch * ~foreground.bool() \
-                   + images * ~foreground.bool() * image_mask
-
-        # combined = padded_patch
-        # print('combined', combined)
-
-        # grad = images * inverted_mask + images * ~foreground.bool() * image_mask
-        # grad = images * ~foreground.bool() * image_mask
-
-        # axs[0, 0].imshow(images.detach().numpy()[0, idx, :, :, :])
-        # axs[0, 1].imshow(inverted_mask.detach().numpy()[0, idx, :, :, :])
-        # axs[0, 2].imshow(padded_patch.detach().numpy()[0, idx, :, :, :])
-        # axs[1, 0].imshow(image_mask.detach().numpy()[0, idx, :, :, :])
-        # axs[1, 1].imshow(foreground.detach().numpy()[0, idx, :, :, :])
-        # axs[1, 2].imshow(grad.detach().numpy()[0, idx, :, :, :])
-        # plt.show()
-        #
-        # lkj
+        combined = (
+            images * inverted_mask
+            + padded_patch * image_mask
+            - padded_patch * ~foreground.bool()
+            + images * ~foreground.bool() * image_mask
+        )
 
         return combined
 
-    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> Tuple[np.ndarray, np.ndarray]:
+    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
         """
         Generate an adversarial patch and return the patch and its mask in arrays.
 
@@ -562,42 +258,14 @@ def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> T
         import torch  # lgtm [py/repeated-import]
 
         shuffle = kwargs.get("shuffle", True)
-        # mask = kwargs.get("mask")
         y_init = kwargs.get("y_init")
         foreground = kwargs.get("foreground")
 
-        # if mask is not None:
-        #     mask = mask.copy()
-        # mask = self._check_mask(mask=mask, x=x)
-
-        # if y is None:
-        #     logger.info("Setting labels to estimator predictions and running untargeted attack because `y=None`.")
-        #     y = to_categorical(np.argmax(self.estimator.predict(x=x), axis=1), nb_classes=self.estimator.nb_classes)
-        #     self.targeted = False
-        # else:
-        #     self.targeted = True
-
-        # y = check_and_transform_label_format(labels=y, nb_classes=self.estimator.nb_classes)
-
-        # # check if logits or probabilities
-        # y_pred = self.estimator.predict(x=x[[0]])
-        #
-        # if is_probability(y_pred):
-        #     self.use_logits = False
-        # else:
-        #     self.use_logits = True
-
-        # x_tensor = torch.Tensor(x)
-        # y_tensor = torch.Tensor(y)
-
         class TrackingDataset(torch.utils.data.Dataset):
             def __init__(self, x, y, y_init, foreground):
-                # self.x = np.transpose(x, axes=(0, 1, 4, 2, 3))
-                # self.x = np.transpose(x, axes=(0, 1, 4, 2, 3))
                 self.x = x
                 self.y = y
                 self.y_init = y_init
-                # self.mask = mask
                 self.foreground = foreground
 
             def __len__(self):
@@ -623,61 +291,24 @@ def __getitem__(self, idx):
             drop_last=False,
         )
 
-        from torch.nn.functional import interpolate
-
-        for i_iter in trange(self.max_iter, desc="Adversarial Texture PyTorch", disable=not self.verbose):
+        for _ in trange(self.max_iter, desc="Adversarial Texture PyTorch", disable=not self.verbose):
             for images_i, target_i, y_init_i, foreground_i in data_loader:
+                _ = self._train_step(
+                    images=images_i, target=target_i, y_init=y_init_i, foreground=foreground_i
+                )
 
-                # images_i = images_i.permute(0, 1, 4, 2, 3)
-                # images_i = interpolate(images_i, size=(3, 224, 224))
-                # images_i = images_i.permute(0, 1, 3, 4, 2)
-
-                # images_i = images_i.numpy()
-                # from matplotlib import pyplot as plt
-                # plt.imshow(images_i[0, 8, :, :, :])
-                # plt.show()
-
-                # foreground_i = foreground_i.permute(0, 1, 4, 2, 3)
-                # foreground_i = interpolate(foreground_i, size=(3, 224, 224))
-                # foreground_i = foreground_i.permute(0, 1, 3, 4, 2)
-
-                _ = self._train_step(images=images_i, target=target_i, mask=None, y_init=y_init_i, foreground=foreground_i)
-
-        # return (
-        #     self._patch.detach().cpu().numpy(),
-        #     self._get_circular_patch_mask(nb_samples=1).numpy()[0],
-        # )
-        return self.apply_patch(x=x, scale=1, foreground=foreground)
-
-    def _check_mask(self, mask: np.ndarray, x: np.ndarray) -> np.ndarray:
-        if mask is not None and (
-            (mask.dtype != np.bool)
-            or not (mask.shape[0] == 1 or mask.shape[0] == x.shape[0])
-            or not (mask.shape[1] == x.shape[self.i_h + 1] and mask.shape[2] == x.shape[self.i_w + 1])
-        ):
-            raise ValueError(
-                "The shape of `mask` has to be equal to the shape of a single samples (1, H, W) or the"
-                "shape of `x` (N, H, W) without their channel dimensions."
-            )
-
-        if mask is not None and mask.shape[0] == 1:
-            mask = np.repeat(mask, repeats=x.shape[0], axis=0)
-
-        return mask
+        return self.apply_patch(x=x, foreground=foreground)
 
     def apply_patch(
         self,
         x: np.ndarray,
-        scale: float,
         patch_external: Optional[np.ndarray] = None,
-        mask: Optional[np.ndarray] = None,
-        foreground=None,
+        foreground: Optional[np.ndarray] = None,
     ) -> np.ndarray:
         """
         A function to apply the learned adversarial patch to images or videos.
 
         :param x: Instances to apply randomly transformed patch.
-        :param scale: Scale of the applied patch in relation to the classifier input shape.
         :param patch_external: External patch to apply to images `x`.
         :param mask: An boolean array of shape equal to the shape of a single samples (1, H, W) or the shape of `x`
                      (N, H, W) without their channel dimensions. Any features for which the mask is True can be the
@@ -686,30 +317,11 @@ def apply_patch(
         """
         import torch  # lgtm [py/repeated-import]
 
-        # if mask is not None:
-        #     mask = mask.copy()
-        # mask = self._check_mask(mask=mask, x=x)
         patch = patch_external if patch_external is not None else self._patch
         x = torch.Tensor(x)
-
-        from torch.nn.functional import interpolate
-
-        x_i = x
-        # x_i = x_i.permute(0, 1, 4, 2, 3)
-        # x_i = interpolate(x_i, size=(3, 224, 224))
-        # x_i = x_i.permute(0, 1, 3, 4, 2)
-
         foreground = torch.Tensor(foreground)
 
-        from torch.nn.functional import interpolate
-
-        foreground_i = foreground
-        # foreground_i = foreground_i.permute(0, 1, 4, 2, 3)
-        # foreground_i = interpolate(foreground_i, size=(3, 224, 224))
-        # foreground_i = foreground_i.permute(0, 1, 3, 4, 2)
-
-
-        return self._random_overlay(images=x_i, patch=patch, scale=scale, mask=mask, foreground=foreground_i).detach().cpu().numpy()
+        return self._random_overlay(images=x, patch=patch, foreground=foreground).detach().cpu().numpy()
 
     def reset_patch(self, initial_patch_value: Optional[Union[float, np.ndarray]] = None) -> None:
         """
@@ -745,9 +357,3 @@ def insert_transformed_patch(x: np.ndarray, patch: np.ndarray, image_coords: np.
 
     def _check_params(self) -> None:
         super()._check_params()
-
-        if not isinstance(self.distortion_scale_max, (float, int)) or 1.0 <= self.distortion_scale_max < 0.0:
-            raise ValueError("The maximum distortion scale has to be greater than or equal 0.0 or smaller than 1.0.")
-
-        if self.patch_type not in ["circle", "square"]:
-            raise ValueError("The patch type has to be either `circle` or `square`.")

From b1716d25bd850377eeb4792f150bc68ec2596927 Mon Sep 17 00:00:00 2001
From: Beat Buesser <beat.buesser@ie.ibm.com>
Date: Fri, 8 Oct 2021 19:38:56 +0100
Subject: [PATCH 03/12] Add support for mini-batches

Signed-off-by: Beat Buesser <beat.buesser@ie.ibm.com>
---
 .../adversarial_texture_pytorch.py            | 150 ++++++++++--------
 1 file changed, 80 insertions(+), 70 deletions(-)

diff --git a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
index 124badc0d6..ef831f54d8 100644
--- a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
+++ b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
@@ -16,12 +16,12 @@
 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 """
-Implementation of the adversarial patch attack for square and rectangular images and videos in PyTorch.
+Implementation of the adversarial texture attack on object trackers in PyTorch.
 
-| Paper link: https://arxiv.org/abs/1712.09665
+| Paper link: https://arxiv.org/abs/1904.11042
 """
 import logging
-from typing import Optional, Union, TYPE_CHECKING
+from typing import Dict, List, Optional, Union, TYPE_CHECKING
 
 import numpy as np
 from tqdm.auto import trange
@@ -39,9 +39,9 @@
 
 class AdversarialTexturePyTorch(EvasionAttack):
     """
-    Implementation of the adversarial patch attack for square and rectangular images and videos in PyTorch.
+    Implementation of the adversarial texture attack on object trackers in PyTorch.
 
-    | Paper link: https://arxiv.org/abs/1712.09665
+    | Paper link: https://arxiv.org/abs/1904.11042
     """
 
     attack_params = EvasionAttack.attack_params + [
@@ -85,23 +85,18 @@ def __init__(
         import torch  # lgtm [py/repeated-import]
 
         super().__init__(estimator=estimator)
-        self.step_size = step_size
-        self.max_iter = max_iter
-        self.batch_size = batch_size
-
         self.patch_height = patch_height
         self.patch_width = patch_width
         self.x_min = x_min
         self.y_min = y_min
-
-        self.image_shape = estimator.input_shape
-        self.input_shape = self.estimator.input_shape
-
-        self.patch_shape = (self.patch_height, self.patch_width, 3)
-
+        self.step_size = step_size
+        self.max_iter = max_iter
+        self.batch_size = batch_size
         self.verbose = verbose
         self._check_params()
 
+        self.patch_shape = (self.patch_height, self.patch_width, 3)
+
         if self.estimator.channels_first:
             raise ValueError("Input shape has to be either NHWC or NFHWC.")
 
@@ -122,17 +117,18 @@ def __init__(
         ]
         self._initial_value = np.ones(self.patch_shape) * mean_value
         self._patch = torch.tensor(self._initial_value, requires_grad=True, device=self.estimator.device)
-        # self._patch = torch.from_numpy(self._initial_value)
-        # self._patch.requires_grad = True
-        # self._patch.to(self.estimator.device)
 
     def _train_step(
-        self, images: "torch.Tensor", target: "torch.Tensor", y_init, foreground: Optional["torch.Tensor"]
+        self,
+        videos: "torch.Tensor",
+        target: "torch.Tensor",
+        y_init: "torch.Tensor",
+        foreground: Optional["torch.Tensor"],
     ) -> "torch.Tensor":
         import torch  # lgtm [py/repeated-import]
 
         self.estimator.model.zero_grad()
-        loss = self._loss(images, target, y_init, foreground)
+        loss = self._loss(videos, target, y_init, foreground)
         loss.backward(retain_graph=True)
 
         gradients = self._patch.grad.sign() * self.step_size
@@ -144,10 +140,12 @@ def _train_step(
 
         return loss
 
-    def _predictions(self, images: "torch.Tensor", y_init, foreground) -> "torch.Tensor":
+    def _predictions(
+        self, videos: "torch.Tensor", y_init: "torch.Tensor", foreground: Optional["torch.Tensor"]
+    ) -> "torch.Tensor":
         import torch  # lgtm [py/repeated-import]
 
-        patched_input = self._random_overlay(images, self._patch, foreground=foreground)
+        patched_input = self._apply_texture(videos, self._patch, foreground=foreground)
         patched_input = torch.clamp(
             patched_input,
             min=self.estimator.clip_values[0],
@@ -159,18 +157,27 @@ def _predictions(self, images: "torch.Tensor", y_init, foreground) -> "torch.Ten
         return predictions
 
     def _loss(
-        self, images: "torch.Tensor", target: "torch.Tensor", y_init, foreground: Optional["torch.Tensor"]
+        self,
+        images: "torch.Tensor",
+        target: "torch.Tensor",
+        y_init: "torch.Tensor",
+        foreground: Optional["torch.Tensor"],
     ) -> "torch.Tensor":
         import torch  # lgtm [py/repeated-import]
 
         y_pred = self._predictions(images, y_init, foreground)
         loss = torch.nn.L1Loss(size_average=False)(y_pred[0]["boxes"].float(), target["boxes"][0].float())
+        for i in range(1, len(y_pred)):
+            loss = loss + torch.nn.L1Loss(size_average=False)(y_pred[i]["boxes"].float(), target["boxes"][i].float())
 
         return loss
 
-    def _get_circular_patch_mask(self, nb_samples: int) -> "torch.Tensor":
+    def _get_patch_mask(self, nb_samples: int) -> "torch.Tensor":
         """
-        Return a circular patch mask.
+        Create patch mask.
+
+        :param nb_samples: Number of samples.
+        :return: Patch mask.
         """
         import torch  # lgtm [py/repeated-import]
 
@@ -182,20 +189,30 @@ def _get_circular_patch_mask(self, nb_samples: int) -> "torch.Tensor":
         image_mask = torch.stack([image_mask] * nb_samples, dim=0)
         return image_mask
 
-    def _random_overlay(self, images: "torch.Tensor", patch: "torch.Tensor", foreground=None) -> "torch.Tensor":
+    def _apply_texture(
+        self, videos: "torch.Tensor", patch: "torch.Tensor", foreground: Optional["torch.Tensor"]
+    ) -> "torch.Tensor":
+        """
+        Apply texture over background and overlay foreground.
+
+        :param videos:
+        :param patch:
+        :param foreground:
+        :return: Patched videos.
+        """
         import torch  # lgtm [py/repeated-import]
         import torchvision
 
-        nb_samples = images.shape[0]
+        nb_samples = videos.shape[0]
 
-        image_mask = self._get_circular_patch_mask(nb_samples=nb_samples)
+        image_mask = self._get_patch_mask(nb_samples=nb_samples)
         image_mask = image_mask.float()
 
         pad_h_before = self.x_min
-        pad_h_after = int(images.shape[self.i_h + 1] - pad_h_before - image_mask.shape[self.i_h_patch + 1])
+        pad_h_after = int(videos.shape[self.i_h + 1] - pad_h_before - image_mask.shape[self.i_h_patch + 1])
 
         pad_w_before = self.y_min
-        pad_w_after = int(images.shape[self.i_w + 1] - pad_w_before - image_mask.shape[self.i_w_patch + 1])
+        pad_w_after = int(videos.shape[self.i_w + 1] - pad_w_before - image_mask.shape[self.i_w_patch + 1])
 
         image_mask = image_mask.permute(0, 3, 1, 2)
 
@@ -209,7 +226,7 @@ def _random_overlay(self, images: "torch.Tensor", patch: "torch.Tensor", foregro
         image_mask = image_mask.permute(0, 2, 3, 1)
 
         image_mask = torch.unsqueeze(image_mask, dim=1)
-        image_mask = torch.repeat_interleave(image_mask, dim=1, repeats=images.shape[1])
+        image_mask = torch.repeat_interleave(image_mask, dim=1, repeats=videos.shape[1])
 
         image_mask = image_mask.float()
 
@@ -228,31 +245,41 @@ def _random_overlay(self, images: "torch.Tensor", patch: "torch.Tensor", foregro
         padded_patch = padded_patch.permute(0, 2, 3, 1)
 
         padded_patch = torch.unsqueeze(padded_patch, dim=1)
-        padded_patch = torch.repeat_interleave(padded_patch, dim=1, repeats=images.shape[1])
+        padded_patch = torch.repeat_interleave(padded_patch, dim=1, repeats=videos.shape[1])
 
         padded_patch = padded_patch.float()
 
         inverted_mask = torch.from_numpy(np.ones(shape=image_mask.shape, dtype=np.float32)) - image_mask
 
         combined = (
-            images * inverted_mask
+            videos * inverted_mask
             + padded_patch * image_mask
             - padded_patch * ~foreground.bool()
-            + images * ~foreground.bool() * image_mask
+            + videos * ~foreground.bool() * image_mask
         )
 
         return combined
 
-    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
+    def generate(self, x: np.ndarray, y: List[Dict[str, np.ndarray]], **kwargs) -> np.ndarray:
         """
         Generate an adversarial patch and return the patch and its mask in arrays.
 
-        :param x: An array with the original input images of shape NHWC or input videos of shape NFHWC.
-        :param y: An array with the original true labels.
-        :param mask: An boolean array of shape equal to the shape of a single samples (1, H, W) or the shape of `x`
-                     (N, H, W) without their channel dimensions. Any features for which the mask is True can be the
-                     center location of the patch during sampling.
-        :type mask: `np.ndarray`
+        :param x: Input videos of shape NFHWC.
+        :param y: True labels of format `List[Dict[str, np.ndarray]]`, one dictionary for each input image. The keys of
+                  the dictionary are:
+                  - boxes [N_FRAMES, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and
+                                         0 <= y1 < y2 <= H.
+
+        :Keyword Arguments:
+            * *shuffle* (``np.ndarray``) --
+              Shuffle order of samples, labels, initial boxes, and foregrounds for texture generation.
+            * *y_init* (``np.ndarray``) --
+              Initial boxes around object to be tracked of shape (nb_samples, 4) with second dimension representing
+              [x1, y1, x2, y2] with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
+            * *foreground* (``np.ndarray``) --
+              Foreground masks of shape NFHWC of boolean values with False/0.0 representing foreground, preventing
+              updates to the texture, and True/1.0 for background, allowing updates to the texture.
+
         :return: An array with adversarial patch and an array of the patch mask.
         """
         import torch  # lgtm [py/repeated-import]
@@ -276,7 +303,6 @@ def __getitem__(self, idx):
 
                 target = {}
                 target["boxes"] = torch.from_numpy(y[idx]["boxes"])
-                target["labels"] = y[idx]["labels"]
 
                 y_init_i = self.y_init[idx]
                 foreground_i = self.foreground[idx]
@@ -292,10 +318,8 @@ def __getitem__(self, idx):
         )
 
         for _ in trange(self.max_iter, desc="Adversarial Texture PyTorch", disable=not self.verbose):
-            for images_i, target_i, y_init_i, foreground_i in data_loader:
-                _ = self._train_step(
-                    images=images_i, target=target_i, y_init=y_init_i, foreground=foreground_i
-                )
+            for videos_i, target_i, y_init_i, foreground_i in data_loader:
+                _ = self._train_step(videos=videos_i, target=target_i, y_init=y_init_i, foreground=foreground_i)
 
         return self.apply_patch(x=x, foreground=foreground)
 
@@ -306,14 +330,14 @@ def apply_patch(
         foreground: Optional[np.ndarray] = None,
     ) -> np.ndarray:
         """
-        A function to apply the learned adversarial patch to images or videos.
-
-        :param x: Instances to apply randomly transformed patch.
-        :param patch_external: External patch to apply to images `x`.
-        :param mask: An boolean array of shape equal to the shape of a single samples (1, H, W) or the shape of `x`
-                     (N, H, W) without their channel dimensions. Any features for which the mask is True can be the
-                     center location of the patch during sampling.
-        :return: The patched samples.
+        A function to apply the learned adversarial texture to videos.
+
+        :param x: Videos of shape NFHWC to apply adversarial texture.
+        :param patch_external: External patch to apply to videos `x`.
+        :param foreground: Foreground masks of shape NFHWC of boolean values with False/0.0 representing foreground,
+                           preventing updates to the texture, and True/1.0 for background, allowing updates to the
+                           texture.
+        :return: The videos with adversarial textures.
         """
         import torch  # lgtm [py/repeated-import]
 
@@ -321,11 +345,11 @@ def apply_patch(
         x = torch.Tensor(x)
         foreground = torch.Tensor(foreground)
 
-        return self._random_overlay(images=x, patch=patch, foreground=foreground).detach().cpu().numpy()
+        return self._apply_texture(videos=x, patch=patch, foreground=foreground).detach().cpu().numpy()
 
     def reset_patch(self, initial_patch_value: Optional[Union[float, np.ndarray]] = None) -> None:
         """
-        Reset the adversarial patch.
+        Reset the adversarial texture.
 
         :param initial_patch_value: Patch value to use for resetting the patch.
         """
@@ -341,19 +365,5 @@ def reset_patch(self, initial_patch_value: Optional[Union[float, np.ndarray]] =
         else:
             raise ValueError("Unexpected value for initial_patch_value.")
 
-    @staticmethod
-    def insert_transformed_patch(x: np.ndarray, patch: np.ndarray, image_coords: np.ndarray):
-        """
-        Insert patch to image based on given or selected coordinates.
-
-        :param x: The image to insert the patch.
-        :param patch: The patch to be transformed and inserted.
-        :param image_coords: The coordinates of the 4 corners of the transformed, inserted patch of shape
-            [[x1, y1], [x2, y2], [x3, y3], [x4, y4]] in pixel units going in clockwise direction, starting with upper
-            left corner.
-        :return: The input `x` with the patch inserted.
-        """
-        return insert_transformed_patch(x, patch, image_coords)
-
     def _check_params(self) -> None:
         super()._check_params()

From eeb175e679f27c16648773f0bc486c96ebc15fe6 Mon Sep 17 00:00:00 2001
From: Beat Buesser <beat.buesser@ie.ibm.com>
Date: Fri, 8 Oct 2021 22:06:48 +0100
Subject: [PATCH 04/12] Add support for cuda

Signed-off-by: Beat Buesser <beat.buesser@ie.ibm.com>
---
 .../adversarial_texture_pytorch.py            | 23 +++++++++++++------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
index ef831f54d8..07b2076ecc 100644
--- a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
+++ b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
@@ -166,9 +166,9 @@ def _loss(
         import torch  # lgtm [py/repeated-import]
 
         y_pred = self._predictions(images, y_init, foreground)
-        loss = torch.nn.L1Loss(size_average=False)(y_pred[0]["boxes"].float(), target["boxes"][0].float())
+        loss = torch.nn.L1Loss(size_average=False)(y_pred[0]["boxes"].float(), target[0]["boxes"].float())
         for i in range(1, len(y_pred)):
-            loss = loss + torch.nn.L1Loss(size_average=False)(y_pred[i]["boxes"].float(), target["boxes"][i].float())
+            loss = loss + torch.nn.L1Loss(size_average=False)(y_pred[i]["boxes"].float(), target[i]["boxes"].float())
 
         return loss
 
@@ -185,7 +185,7 @@ def _get_patch_mask(self, nb_samples: int) -> "torch.Tensor":
 
         image_mask = np.expand_dims(image_mask, axis=2)
         image_mask = np.broadcast_to(image_mask, self.patch_shape)
-        image_mask = torch.Tensor(np.array(image_mask))
+        image_mask = torch.Tensor(np.array(image_mask)).to(self.estimator.device)
         image_mask = torch.stack([image_mask] * nb_samples, dim=0)
         return image_mask
 
@@ -249,7 +249,9 @@ def _apply_texture(
 
         padded_patch = padded_patch.float()
 
-        inverted_mask = torch.from_numpy(np.ones(shape=image_mask.shape, dtype=np.float32)) - image_mask
+        inverted_mask = (
+            torch.from_numpy(np.ones(shape=image_mask.shape, dtype=np.float32)).to(self.estimator.device) - image_mask
+        )
 
         combined = (
             videos * inverted_mask
@@ -319,7 +321,14 @@ def __getitem__(self, idx):
 
         for _ in trange(self.max_iter, desc="Adversarial Texture PyTorch", disable=not self.verbose):
             for videos_i, target_i, y_init_i, foreground_i in data_loader:
-                _ = self._train_step(videos=videos_i, target=target_i, y_init=y_init_i, foreground=foreground_i)
+                videos_i = videos_i.to(self.estimator.device)
+                y_init_i = y_init_i.to(self.estimator.device)
+                foreground_i = foreground_i.to(self.estimator.device)
+                target_i_list = []
+                for i_t in range(videos_i.shape[0]):
+                    target_i_list.append({"boxes": target_i["boxes"][i_t].to(self.estimator.device)})
+
+                _ = self._train_step(videos=videos_i, target=target_i_list, y_init=y_init_i, foreground=foreground_i)
 
         return self.apply_patch(x=x, foreground=foreground)
 
@@ -342,8 +351,8 @@ def apply_patch(
         import torch  # lgtm [py/repeated-import]
 
         patch = patch_external if patch_external is not None else self._patch
-        x = torch.Tensor(x)
-        foreground = torch.Tensor(foreground)
+        x = torch.Tensor(x).to(self.estimator.device)
+        foreground = torch.Tensor(foreground).to(self.estimator.device)
 
         return self._apply_texture(videos=x, patch=patch, foreground=foreground).detach().cpu().numpy()
 

From 67555d3e92d6ffdb5b39efdef5db6132eacf118f Mon Sep 17 00:00:00 2001
From: Beat Buesser <beat.buesser@ie.ibm.com>
Date: Fri, 8 Oct 2021 22:43:15 +0100
Subject: [PATCH 05/12] Udpate import

Signed-off-by: Beat Buesser <beat.buesser@ie.ibm.com>
---
 art/attacks/evasion/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/art/attacks/evasion/__init__.py b/art/attacks/evasion/__init__.py
index 65e92570a9..3b7c33912f 100644
--- a/art/attacks/evasion/__init__.py
+++ b/art/attacks/evasion/__init__.py
@@ -5,6 +5,7 @@
 from art.attacks.evasion.adversarial_patch.adversarial_patch_numpy import AdversarialPatchNumpy
 from art.attacks.evasion.adversarial_patch.adversarial_patch_tensorflow import AdversarialPatchTensorFlowV2
 from art.attacks.evasion.adversarial_patch.adversarial_patch_pytorch import AdversarialPatchPyTorch
+from art.attacks.evasion.adversarial_texture.adversarial_texture_pytorch import AdversarialTexturePyTorch
 from art.attacks.evasion.adversarial_asr import CarliniWagnerASR
 from art.attacks.evasion.auto_attack import AutoAttack
 from art.attacks.evasion.auto_projected_gradient_descent import AutoProjectedGradientDescent

From a2c170d58692fb661c9275d56bedfcd8caa931f8 Mon Sep 17 00:00:00 2001
From: Beat Buesser <beat.buesser@ie.ibm.com>
Date: Tue, 12 Oct 2021 14:48:23 +0100
Subject: [PATCH 06/12] Update typing

Signed-off-by: Beat Buesser <beat.buesser@ie.ibm.com>
---
 .../adversarial_texture_pytorch.py            | 52 ++++++++++++-------
 1 file changed, 33 insertions(+), 19 deletions(-)

diff --git a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
index 07b2076ecc..9d54b55792 100644
--- a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
+++ b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
@@ -27,7 +27,6 @@
 from tqdm.auto import trange
 
 from art.attacks.attack import EvasionAttack
-from art.attacks.evasion.adversarial_patch.utils import insert_transformed_patch
 from art.estimators.estimator import BaseEstimator
 
 if TYPE_CHECKING:
@@ -121,7 +120,7 @@ def __init__(
     def _train_step(
         self,
         videos: "torch.Tensor",
-        target: "torch.Tensor",
+        target: List[Dict[str, "torch.Tensor"]],
         y_init: "torch.Tensor",
         foreground: Optional["torch.Tensor"],
     ) -> "torch.Tensor":
@@ -142,7 +141,7 @@ def _train_step(
 
     def _predictions(
         self, videos: "torch.Tensor", y_init: "torch.Tensor", foreground: Optional["torch.Tensor"]
-    ) -> "torch.Tensor":
+    ) -> List[Dict[str, "torch.Tensor"]]:
         import torch  # lgtm [py/repeated-import]
 
         patched_input = self._apply_texture(videos, self._patch, foreground=foreground)
@@ -159,7 +158,7 @@ def _predictions(
     def _loss(
         self,
         images: "torch.Tensor",
-        target: "torch.Tensor",
+        target: List[Dict[str, "torch.Tensor"]],
         y_init: "torch.Tensor",
         foreground: Optional["torch.Tensor"],
     ) -> "torch.Tensor":
@@ -181,11 +180,11 @@ def _get_patch_mask(self, nb_samples: int) -> "torch.Tensor":
         """
         import torch  # lgtm [py/repeated-import]
 
-        image_mask = np.ones((self.patch_height, self.patch_width))
+        image_mask_np = np.ones((self.patch_height, self.patch_width))
 
-        image_mask = np.expand_dims(image_mask, axis=2)
-        image_mask = np.broadcast_to(image_mask, self.patch_shape)
-        image_mask = torch.Tensor(np.array(image_mask)).to(self.estimator.device)
+        image_mask_np = np.expand_dims(image_mask_np, axis=2)
+        image_mask_np = np.broadcast_to(image_mask_np, self.patch_shape)
+        image_mask = torch.Tensor(np.array(image_mask_np)).to(self.estimator.device)
         image_mask = torch.stack([image_mask] * nb_samples, dim=0)
         return image_mask
 
@@ -253,16 +252,21 @@ def _apply_texture(
             torch.from_numpy(np.ones(shape=image_mask.shape, dtype=np.float32)).to(self.estimator.device) - image_mask
         )
 
-        combined = (
-            videos * inverted_mask
-            + padded_patch * image_mask
-            - padded_patch * ~foreground.bool()
-            + videos * ~foreground.bool() * image_mask
-        )
+        if foreground is not None:
+            combined = (
+                videos * inverted_mask
+                + padded_patch * image_mask
+                - padded_patch * ~foreground.bool()
+                + videos * ~foreground.bool() * image_mask
+            )
+        else:
+            combined = videos * inverted_mask + padded_patch * image_mask
 
         return combined
 
-    def generate(self, x: np.ndarray, y: List[Dict[str, np.ndarray]], **kwargs) -> np.ndarray:
+    def generate(  # type: ignore  # pylint: disable=W0222
+        self, x: np.ndarray, y: List[Dict[str, np.ndarray]], **kwargs
+    ) -> np.ndarray:
         """
         Generate an adversarial patch and return the patch and its mask in arrays.
 
@@ -291,6 +295,10 @@ def generate(self, x: np.ndarray, y: List[Dict[str, np.ndarray]], **kwargs) -> n
         foreground = kwargs.get("foreground")
 
         class TrackingDataset(torch.utils.data.Dataset):
+            """
+            Object tracking dataset in PyTorch.
+            """
+
             def __init__(self, x, y, y_init, foreground):
                 self.x = x
                 self.y = y
@@ -351,10 +359,16 @@ def apply_patch(
         import torch  # lgtm [py/repeated-import]
 
         patch = patch_external if patch_external is not None else self._patch
-        x = torch.Tensor(x).to(self.estimator.device)
-        foreground = torch.Tensor(foreground).to(self.estimator.device)
-
-        return self._apply_texture(videos=x, patch=patch, foreground=foreground).detach().cpu().numpy()
+        patch_tensor = torch.Tensor(patch).to(self.estimator.device)
+        x_tensor = torch.Tensor(x).to(self.estimator.device)
+        foreground_tensor = torch.Tensor(foreground).to(self.estimator.device)
+
+        return (
+            self._apply_texture(videos=x_tensor, patch=patch_tensor, foreground=foreground_tensor)
+            .detach()
+            .cpu()
+            .numpy()
+        )
 
     def reset_patch(self, initial_patch_value: Optional[Union[float, np.ndarray]] = None) -> None:
         """

From fd59439a887f5c9bdf3974d698c5e991d19f5264 Mon Sep 17 00:00:00 2001
From: Beat Buesser <beat.buesser@ie.ibm.com>
Date: Tue, 12 Oct 2021 21:35:43 +0100
Subject: [PATCH 07/12] Update tensor type

Signed-off-by: Beat Buesser <beat.buesser@ie.ibm.com>
---
 .../evasion/adversarial_texture/adversarial_texture_pytorch.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
index 9d54b55792..df48c42d35 100644
--- a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
+++ b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
@@ -358,8 +358,7 @@ def apply_patch(
         """
         import torch  # lgtm [py/repeated-import]
 
-        patch = patch_external if patch_external is not None else self._patch
-        patch_tensor = torch.Tensor(patch).to(self.estimator.device)
+        patch_tensor = torch.Tensor(patch_external).to(self.estimator.device) if patch_external is not None else self._patch
         x_tensor = torch.Tensor(x).to(self.estimator.device)
         foreground_tensor = torch.Tensor(foreground).to(self.estimator.device)
 

From 567dbe05883329cc149b7352da7973f54a701c34 Mon Sep 17 00:00:00 2001
From: Beat Buesser <beat.buesser@ie.ibm.com>
Date: Tue, 12 Oct 2021 23:55:57 +0100
Subject: [PATCH 08/12] Fix style checks

Signed-off-by: Beat Buesser <beat.buesser@ie.ibm.com>
---
 .github/workflows/ci-style-checks.yml                         | 1 +
 .../adversarial_texture/adversarial_texture_pytorch.py        | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci-style-checks.yml b/.github/workflows/ci-style-checks.yml
index a79679187b..8bb5258ca3 100644
--- a/.github/workflows/ci-style-checks.yml
+++ b/.github/workflows/ci-style-checks.yml
@@ -40,6 +40,7 @@ jobs:
           python -m pip install --upgrade pip setuptools wheel
           pip install -q pylint==2.7.4 mypy==0.812 pycodestyle==2.7.0 black==20.8b1
           pip install -q -r requirements.txt
+          pip install pluggy==0.13.1
           pip list
       - name: pycodestyle
         run: pycodestyle --ignore=C0330,C0415,E203,E231,W503 --max-line-length=120 art
diff --git a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
index df48c42d35..a112f7dd6f 100644
--- a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
+++ b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
@@ -358,7 +358,9 @@ def apply_patch(
         """
         import torch  # lgtm [py/repeated-import]
 
-        patch_tensor = torch.Tensor(patch_external).to(self.estimator.device) if patch_external is not None else self._patch
+        patch_tensor = (
+            torch.Tensor(patch_external).to(self.estimator.device) if patch_external is not None else self._patch
+        )
         x_tensor = torch.Tensor(x).to(self.estimator.device)
         foreground_tensor = torch.Tensor(foreground).to(self.estimator.device)
 

From cad1794bedb2ff88fbb9db6c26bf365c53275a44 Mon Sep 17 00:00:00 2001
From: Beat Buesser <beat.buesser@ie.ibm.com>
Date: Wed, 1 Dec 2021 22:34:00 +0000
Subject: [PATCH 09/12] Add tests for adversarial texture attack

Signed-off-by: Beat Buesser <beat.buesser@ie.ibm.com>
---
 .github/actions/goturn/run.sh                 |   2 +
 .../adversarial_texture_pytorch.py            |  81 +++++--
 conftest.py                                   |  49 +++++
 .../test_adversarial_texture_pytorch.py       | 199 ++++++++++++++++++
 4 files changed, 318 insertions(+), 13 deletions(-)
 create mode 100644 tests/attacks/evasion/test_adversarial_texture_pytorch.py

diff --git a/.github/actions/goturn/run.sh b/.github/actions/goturn/run.sh
index 272d79841b..f46baf114c 100755
--- a/.github/actions/goturn/run.sh
+++ b/.github/actions/goturn/run.sh
@@ -4,5 +4,7 @@ exit_code=0
 
 pytest --cov-report=xml --cov=art --cov-append -q -vv tests/estimators/object_tracking/test_pytorch_goturn.py --framework=pytorch --durations=0
 if [[ $? -ne 0 ]]; then exit_code=1; echo "Failed estimators/object_tracking/test_pytorch_goturn tests"; fi
+pytest --cov-report=xml --cov=art --cov-append -q -vv tests/attacks/evasion/test_adversarial_texture_pytorch.py --framework=pytorch --durations=0
+if [[ $? -ne 0 ]]; then exit_code=1; echo "Failed attacks/evasion/test_adversarial_texture_pytorch tests"; fi
 
 exit ${exit_code}
diff --git a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
index a112f7dd6f..e4ba4baa86 100644
--- a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
+++ b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
@@ -27,7 +27,8 @@
 from tqdm.auto import trange
 
 from art.attacks.attack import EvasionAttack
-from art.estimators.estimator import BaseEstimator
+from art.estimators.estimator import BaseEstimator, LossGradientsMixin
+from art.estimators.object_tracking.object_tracker import ObjectTrackerMixin
 
 if TYPE_CHECKING:
     # pylint: disable=C0412
@@ -54,13 +55,13 @@ class AdversarialTexturePyTorch(EvasionAttack):
         "verbose",
     ]
 
-    _estimator_requirements = (BaseEstimator,)
+    _estimator_requirements = (ObjectTrackerMixin, LossGradientsMixin, BaseEstimator)
 
     def __init__(
         self,
         estimator,
-        patch_height: int = 0,
-        patch_width: int = 0,
+        patch_height: int,
+        patch_width: int,
         x_min: int = 0,
         y_min: int = 0,
         step_size: float = 1.0 / 255.0,
@@ -74,8 +75,8 @@ def __init__(
         :param estimator: A trained estimator.
         :param patch_height: Height of patch.
         :param patch_width: Width of patch.
-        :param x_min: Height of patch.
-        :param y_min: Width of patch.
+        :param x_min: Vertical position of patch, top-left corner.
+        :param y_min: Horizontal position of patch, top-left corner.
         :param step_size: The step size.
         :param max_iter: The number of optimization steps.
         :param batch_size: The size of the training batch.
@@ -124,6 +125,15 @@ def _train_step(
         y_init: "torch.Tensor",
         foreground: Optional["torch.Tensor"],
     ) -> "torch.Tensor":
+        """
+        Apply a training step to the batch based on a mini-batch.
+
+        :param videos: Video samples.
+        :param target: Target labels/boxes.
+        :param y_init: Initial labels/boxes.
+        :param foreground: Foreground mask.
+        :return: Loss.
+        """
         import torch  # lgtm [py/repeated-import]
 
         self.estimator.model.zero_grad()
@@ -142,6 +152,14 @@ def _train_step(
     def _predictions(
         self, videos: "torch.Tensor", y_init: "torch.Tensor", foreground: Optional["torch.Tensor"]
     ) -> List[Dict[str, "torch.Tensor"]]:
+        """
+        Predict object tracking estimator on patched videos.
+
+        :param videos: Video samples.
+        :param y_init: Initial labels/boxes.
+        :param foreground: Foreground mask.
+        :return: Predicted labels/boxes.
+        """
         import torch  # lgtm [py/repeated-import]
 
         patched_input = self._apply_texture(videos, self._patch, foreground=foreground)
@@ -157,14 +175,23 @@ def _predictions(
 
     def _loss(
         self,
-        images: "torch.Tensor",
+        videos: "torch.Tensor",
         target: List[Dict[str, "torch.Tensor"]],
         y_init: "torch.Tensor",
         foreground: Optional["torch.Tensor"],
     ) -> "torch.Tensor":
+        """
+        Calculate L1-loss.
+
+        :param videos: Video samples.
+        :param target: Target labels/boxes.
+        :param y_init: Initial labels/boxes.
+        :param foreground: Foreground mask.
+        :return: Loss.
+        """
         import torch  # lgtm [py/repeated-import]
 
-        y_pred = self._predictions(images, y_init, foreground)
+        y_pred = self._predictions(videos, y_init, foreground)
         loss = torch.nn.L1Loss(size_average=False)(y_pred[0]["boxes"].float(), target[0]["boxes"].float())
         for i in range(1, len(y_pred)):
             loss = loss + torch.nn.L1Loss(size_average=False)(y_pred[i]["boxes"].float(), target[i]["boxes"].float())
@@ -194,9 +221,9 @@ def _apply_texture(
         """
         Apply texture over background and overlay foreground.
 
-        :param videos:
-        :param patch:
-        :param foreground:
+        :param videos: Video samples.
+        :param patch: Patch to apply.
+        :param foreground: Foreground mask.
         :return: Patched videos.
         """
         import torch  # lgtm [py/repeated-import]
@@ -293,6 +320,8 @@ def generate(  # type: ignore  # pylint: disable=W0222
         shuffle = kwargs.get("shuffle", True)
         y_init = kwargs.get("y_init")
         foreground = kwargs.get("foreground")
+        if foreground is None:
+            foreground = np.ones_like(x)
 
         class TrackingDataset(torch.utils.data.Dataset):
             """
@@ -362,7 +391,10 @@ def apply_patch(
             torch.Tensor(patch_external).to(self.estimator.device) if patch_external is not None else self._patch
         )
         x_tensor = torch.Tensor(x).to(self.estimator.device)
-        foreground_tensor = torch.Tensor(foreground).to(self.estimator.device)
+        if foreground is None:
+            foreground_tensor = None
+        else:
+            foreground_tensor = torch.Tensor(foreground).to(self.estimator.device)
 
         return (
             self._apply_texture(videos=x_tensor, patch=patch_tensor, foreground=foreground_tensor)
@@ -390,4 +422,27 @@ def reset_patch(self, initial_patch_value: Optional[Union[float, np.ndarray]] =
             raise ValueError("Unexpected value for initial_patch_value.")
 
     def _check_params(self) -> None:
-        super()._check_params()
+
+        if not isinstance(self.patch_height, int) or self.patch_height <= 0:
+            raise ValueError("The patch height `patch_height` has to be of type int and larger than zero.")
+
+        if not isinstance(self.patch_width, int) or self.patch_width <= 0:
+            raise ValueError("The patch width `patch_width` has to be of type int and larger than zero.")
+
+        if not isinstance(self.x_min, int) or self.x_min < 0:
+            raise ValueError("The vertical position `x_min` has to be of type int and larger than zero.")
+
+        if not isinstance(self.y_min, int) or self.y_min < 0:
+            raise ValueError("The horizontal position `y_min` has to be of type int and larger than zero.")
+
+        if not isinstance(self.step_size, float) or self.step_size <= 0:
+            raise ValueError("The step size `step_size` has to be of type float and larger than zero.")
+
+        if not isinstance(self.max_iter, int) or self.max_iter <= 0:
+            raise ValueError("The number of iterations `max_iter` has to be of type int and larger than zero.")
+
+        if not isinstance(self.batch_size, int) or self.batch_size <= 0:
+            raise ValueError("The batch size `batch_size` has to be of type int and larger than zero.")
+
+        if not isinstance(self.verbose, bool):
+            raise ValueError("The argument `verbose` has to be of type bool.")
diff --git a/conftest.py b/conftest.py
index a635532a4d..247d1ebd70 100644
--- a/conftest.py
+++ b/conftest.py
@@ -901,3 +901,52 @@ def input_shape(self):
 
     frcnn = DummyObjectDetector()
     return frcnn
+
+
+@pytest.fixture()
+def fix_get_goturn():
+
+    from art.estimators.estimator import BaseEstimator, LossGradientsMixin
+    from art.estimators.object_tracking.object_tracker import ObjectTrackerMixin
+
+    class DummyObjectTracker(ObjectTrackerMixin, LossGradientsMixin, BaseEstimator):
+        def __init__(self):
+            super().__init__(
+                model=None,
+                clip_values=(0, 1),
+                preprocessing_defences=None,
+                postprocessing_defences=None,
+                preprocessing=(0, 1),
+            )
+
+            import torch
+
+            self.channels_first = False
+            self._input_shape = None
+            self.postprocessing_defences = None
+            self.device = torch.device("cpu")
+
+        def loss_gradient(self, x: np.ndarray, y: None, **kwargs):
+            return np.ones_like(x)
+
+        def fit(self, x: np.ndarray, y, batch_size: int = 128, nb_epochs: int = 20, **kwargs):
+            raise NotImplementedError
+
+        def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs):
+            boxes_list = list()
+            for i in range(x.shape[1]):
+                boxes_list.append([0.1, 0.2, 0.3, 0.4])
+
+            dict_i = {"boxes": np.array(boxes_list), "labels": np.array([[2]]), "scores": np.array([[0.8]])}
+            return [dict_i] * x.shape[0]
+
+        @property
+        def native_label_is_pytorch_format(self):
+            return True
+
+        @property
+        def input_shape(self):
+            return self._input_shape
+
+    goturn = DummyObjectTracker()
+    return goturn
diff --git a/tests/attacks/evasion/test_adversarial_texture_pytorch.py b/tests/attacks/evasion/test_adversarial_texture_pytorch.py
new file mode 100644
index 0000000000..34757ef7fd
--- /dev/null
+++ b/tests/attacks/evasion/test_adversarial_texture_pytorch.py
@@ -0,0 +1,199 @@
+# MIT License
+#
+# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2021
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+import os
+import logging
+
+import numpy as np
+import pytest
+
+from art.attacks.evasion import AdversarialTexturePyTorch
+from art.estimators.estimator import BaseEstimator, LossGradientsMixin
+from art.estimators.object_tracking.object_tracker import ObjectTrackerMixin
+from art.estimators.object_tracking import PyTorchGoturn
+
+from tests.attacks.utils import backend_test_classifier_type_check_fail
+from tests.utils import ARTTestException
+
+logger = logging.getLogger(__name__)
+
+
+@pytest.fixture()
+def fix_get_mnist_subset(get_mnist_dataset):
+    (x_train_mnist, y_train_mnist), (x_test_mnist, y_test_mnist) = get_mnist_dataset
+    n_train = 10
+    n_test = 10
+    yield x_train_mnist[:n_train], y_train_mnist[:n_train], x_test_mnist[:n_test], y_test_mnist[:n_test]
+
+
+@pytest.mark.skip_module("scripts")
+@pytest.mark.skip_framework("tensorflow", "keras", "kerastf", "mxnet", "non_dl_frameworks")
+def test_generate(art_warning, fix_get_mnist_subset, fix_get_goturn, framework):
+    try:
+        import torch
+        from scripts.train import GoturnTrain
+        from pathlib import Path
+
+        _device = "cpu"
+
+        goturn_path = os.path.join(os.sep, "tmp", "goturn-pytorch")
+
+        model_dir = Path(os.path.join(goturn_path, "src", "goturn", "models"))
+        ckpt_dir = model_dir.joinpath("checkpoints")
+        ckpt_path = next(ckpt_dir.glob("*.ckpt"))
+
+        ckpt_mod = torch.load(
+            os.path.join(goturn_path, "src", "goturn", "models", "checkpoints", "_ckpt_epoch_3.ckpt"),
+            map_location=_device,
+        )
+        ckpt_mod["hparams"]["pretrained_model"] = os.path.join(
+            goturn_path, "src", "goturn", "models", "pretrained", "caffenet_weights.npy"
+        )
+        torch.save(ckpt_mod, os.path.join(goturn_path, "src", "goturn", "models", "checkpoints", "_ckpt_epoch_3.ckpt"))
+
+        model = GoturnTrain.load_from_checkpoint(ckpt_path)
+
+        pgt = PyTorchGoturn(
+            model=model,
+            input_shape=(3, 227, 227),
+            clip_values=(0, 255),
+            preprocessing=(np.array([104.0, 117.0, 123.0]), np.array([1.0, 1.0, 1.0])),
+            device_type=_device,
+        )
+
+        y_init = np.array([[48, 79, 80, 110], [48, 79, 80, 110]])
+        x_list = list()
+        for i in range(2):
+            x_list.append(np.random.random_integers(0, 255, size=(4 + i, 277, 277, 3)).astype(float) / 255.0)
+
+        x = np.asarray(x_list, dtype=object)
+
+        y_pred = pgt.predict(x=x, y_init=y_init)
+
+        attack = AdversarialTexturePyTorch(
+            pgt,
+            patch_height=4,
+            patch_width=4,
+            x_min=2,
+            y_min=2,
+            step_size=1.0 / 255.0,
+            max_iter=500,
+            batch_size=16,
+            verbose=True,
+        )
+
+        patch = attack.generate(x=x, y=y_pred, y_init=y_init)
+        assert patch.shape == (4, 4, 3)
+
+    except ARTTestException as e:
+        art_warning(e)
+
+
+@pytest.mark.skip_framework("tensorflow", "keras", "kerastf", "mxnet", "non_dl_frameworks")
+def test_apply_patch(art_warning, fix_get_goturn):
+    try:
+        goturn = fix_get_goturn
+        attack = AdversarialTexturePyTorch(
+            goturn,
+            patch_height=4,
+            patch_width=4,
+            x_min=2,
+            y_min=2,
+            step_size=1.0 / 255.0,
+            max_iter=500,
+            batch_size=16,
+            verbose=True,
+        )
+
+        patch = np.ones(shape=(4, 4, 3))
+        foreground = np.ones(shape=(1, 15, 10, 10, 3))
+        foreground[:, :, 5, 5, :] = 0
+        x = np.zeros(shape=(1, 15, 10, 10, 3))
+
+        patched_images = attack.apply_patch(x=x, patch_external=patch, foreground=foreground)
+
+        patch_sum_expected = 15 * 3 * (4 * 4 - 1)
+        complement_sum_expected = 0.0
+
+        patch_sum = np.sum(patched_images[0, :, 2:6, 2:6, :])
+        complement_sum = np.sum(patched_images[0]) - patch_sum
+
+        assert patch_sum == patch_sum_expected
+        assert complement_sum == complement_sum_expected
+
+    except ARTTestException as e:
+        art_warning(e)
+
+
+@pytest.mark.skip_framework("tensorflow", "keras", "kerastf", "mxnet", "non_dl_frameworks")
+def test_check_params(art_warning, fix_get_goturn):
+    try:
+        goturn = fix_get_goturn
+
+        with pytest.raises(ValueError):
+            _ = AdversarialTexturePyTorch(goturn, patch_height=-2, patch_width=2)
+        with pytest.raises(ValueError):
+            _ = AdversarialTexturePyTorch(goturn, patch_height=2.0, patch_width=2)
+
+        with pytest.raises(ValueError):
+            _ = AdversarialTexturePyTorch(goturn, patch_height=2, patch_width=-2)
+        with pytest.raises(ValueError):
+            _ = AdversarialTexturePyTorch(goturn, patch_height=2, patch_width=2.0)
+
+        with pytest.raises(ValueError):
+            _ = AdversarialTexturePyTorch(goturn, patch_height=2, patch_width=2, x_min=-2)
+        with pytest.raises(ValueError):
+            _ = AdversarialTexturePyTorch(goturn, patch_height=2, patch_width=2, x_min=2.0)
+
+        with pytest.raises(ValueError):
+            _ = AdversarialTexturePyTorch(goturn, patch_height=2, patch_width=2, y_min=-2)
+        with pytest.raises(ValueError):
+            _ = AdversarialTexturePyTorch(goturn, patch_height=2, patch_width=2, y_min=2.0)
+
+        with pytest.raises(ValueError):
+            _ = AdversarialTexturePyTorch(goturn, patch_height=2, patch_width=2, step_size=-2.0)
+        with pytest.raises(ValueError):
+            _ = AdversarialTexturePyTorch(goturn, patch_height=2, patch_width=2, step_size=2)
+
+        with pytest.raises(ValueError):
+            _ = AdversarialTexturePyTorch(goturn, patch_height=2, patch_width=2, max_iter=-2)
+        with pytest.raises(ValueError):
+            _ = AdversarialTexturePyTorch(goturn, patch_height=2, patch_width=2, max_iter=2.0)
+
+        with pytest.raises(ValueError):
+            _ = AdversarialTexturePyTorch(goturn, patch_height=2, patch_width=2, batch_size=-2)
+        with pytest.raises(ValueError):
+            _ = AdversarialTexturePyTorch(goturn, patch_height=2, patch_width=2, batch_size=2.0)
+
+        with pytest.raises(ValueError):
+            _ = AdversarialTexturePyTorch(goturn, patch_height=2, patch_width=2, verbose="true")
+
+    except ARTTestException as e:
+        art_warning(e)
+
+
+# @pytest.mark.framework_agnostic
+def test_classifier_type_check_fail(art_warning):
+    try:
+        backend_test_classifier_type_check_fail(
+            AdversarialTexturePyTorch,
+            [BaseEstimator, LossGradientsMixin, ObjectTrackerMixin],
+            patch_height=2,
+            patch_width=2,
+        )
+    except ARTTestException as e:
+        art_warning(e)

From 3bc817cc4e1fa415ec3d40e99af2bbd734d5b788 Mon Sep 17 00:00:00 2001
From: Beat Buesser <beat.buesser@ie.ibm.com>
Date: Fri, 3 Dec 2021 17:57:17 +0000
Subject: [PATCH 10/12] Update tests

Signed-off-by: Beat Buesser <beat.buesser@ie.ibm.com>
---
 .../adversarial_texture/adversarial_texture_pytorch.py    | 2 +-
 tests/attacks/evasion/test_adversarial_texture_pytorch.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
index e4ba4baa86..5ac96ee454 100644
--- a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
+++ b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
@@ -313,7 +313,7 @@ def generate(  # type: ignore  # pylint: disable=W0222
               Foreground masks of shape NFHWC of boolean values with False/0.0 representing foreground, preventing
               updates to the texture, and True/1.0 for background, allowing updates to the texture.
 
-        :return: An array with adversarial patch and an array of the patch mask.
+        :return: An array with images patched with adversarial texture.
         """
         import torch  # lgtm [py/repeated-import]
 
diff --git a/tests/attacks/evasion/test_adversarial_texture_pytorch.py b/tests/attacks/evasion/test_adversarial_texture_pytorch.py
index 34757ef7fd..60792f762d 100644
--- a/tests/attacks/evasion/test_adversarial_texture_pytorch.py
+++ b/tests/attacks/evasion/test_adversarial_texture_pytorch.py
@@ -78,9 +78,9 @@ def test_generate(art_warning, fix_get_mnist_subset, fix_get_goturn, framework):
         y_init = np.array([[48, 79, 80, 110], [48, 79, 80, 110]])
         x_list = list()
         for i in range(2):
-            x_list.append(np.random.random_integers(0, 255, size=(4 + i, 277, 277, 3)).astype(float) / 255.0)
+            x_list.append(np.random.random_integers(0, 255, size=(4, 277, 277, 3)).astype(float) / 255.0)
 
-        x = np.asarray(x_list, dtype=object)
+        x = np.asarray(x_list, dtype=float)
 
         y_pred = pgt.predict(x=x, y_init=y_init)
 
@@ -91,13 +91,13 @@ def test_generate(art_warning, fix_get_mnist_subset, fix_get_goturn, framework):
             x_min=2,
             y_min=2,
             step_size=1.0 / 255.0,
-            max_iter=500,
+            max_iter=5,
             batch_size=16,
             verbose=True,
         )
 
         patch = attack.generate(x=x, y=y_pred, y_init=y_init)
-        assert patch.shape == (4, 4, 3)
+        assert patch.shape == (2, 4, 277, 277, 3)
 
     except ARTTestException as e:
         art_warning(e)

From 4b0675c28cd41ee5fe702e868659c5c18e60bb03 Mon Sep 17 00:00:00 2001
From: Beat Buesser <beat.buesser@ie.ibm.com>
Date: Fri, 10 Dec 2021 16:57:15 +0000
Subject: [PATCH 11/12] Replace deprecated argument size_average

Signed-off-by: Beat Buesser <beat.buesser@ie.ibm.com>
---
 .../adversarial_texture/adversarial_texture_pytorch.py        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
index 5ac96ee454..c43fe9e7f4 100644
--- a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
+++ b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
@@ -192,9 +192,9 @@ def _loss(
         import torch  # lgtm [py/repeated-import]
 
         y_pred = self._predictions(videos, y_init, foreground)
-        loss = torch.nn.L1Loss(size_average=False)(y_pred[0]["boxes"].float(), target[0]["boxes"].float())
+        loss = torch.nn.L1Loss(reduction='sum')(y_pred[0]["boxes"].float(), target[0]["boxes"].float())
         for i in range(1, len(y_pred)):
-            loss = loss + torch.nn.L1Loss(size_average=False)(y_pred[i]["boxes"].float(), target[i]["boxes"].float())
+            loss = loss + torch.nn.L1Loss(reduction='sum')(y_pred[i]["boxes"].float(), target[i]["boxes"].float())
 
         return loss
 

From 0f23952cec377686f94b5e7215552c20fafc27ed Mon Sep 17 00:00:00 2001
From: Beat Buesser <beat.buesser@ie.ibm.com>
Date: Fri, 10 Dec 2021 17:19:34 +0000
Subject: [PATCH 12/12] Fix style check

Signed-off-by: Beat Buesser <beat.buesser@ie.ibm.com>
---
 .../adversarial_texture/adversarial_texture_pytorch.py        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
index c43fe9e7f4..72b8677b55 100644
--- a/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
+++ b/art/attacks/evasion/adversarial_texture/adversarial_texture_pytorch.py
@@ -192,9 +192,9 @@ def _loss(
         import torch  # lgtm [py/repeated-import]
 
         y_pred = self._predictions(videos, y_init, foreground)
-        loss = torch.nn.L1Loss(reduction='sum')(y_pred[0]["boxes"].float(), target[0]["boxes"].float())
+        loss = torch.nn.L1Loss(reduction="sum")(y_pred[0]["boxes"].float(), target[0]["boxes"].float())
         for i in range(1, len(y_pred)):
-            loss = loss + torch.nn.L1Loss(reduction='sum')(y_pred[i]["boxes"].float(), target[i]["boxes"].float())
+            loss = loss + torch.nn.L1Loss(reduction="sum")(y_pred[i]["boxes"].float(), target[i]["boxes"].float())
 
         return loss