Merge pull request #1702 from Trusted-AI/development_issue_1683

Add support for framework-specific preprocessing of object arrays
Trusted-AI · May 23, 2022 · 8ba938d · 8ba938d
2 parents c8c6f6d + 1c81523
commit 8ba938d
Show file tree

Hide file tree

Showing 10 changed files with 1,955 additions and 1,305 deletions.
diff --git a/.github/actions/deepspeech-v2/Dockerfile b/.github/actions/deepspeech-v2/Dockerfile
@@ -37,8 +37,9 @@ RUN cd warp-ctc/pytorch_binding && python setup.py install
 
 RUN git clone https://github.com/SeanNaren/deepspeech.pytorch.git
 RUN cd deepspeech.pytorch && git checkout V2.1
-RUN cd deepspeech.pytorch && pip install -r requirements_test.txt
+RUN cd deepspeech.pytorch && pip install -r requirements.txt
 RUN cd deepspeech.pytorch && pip install -e .
 
 RUN pip install numba==0.50.0
 RUN pip install pytest-cov
+RUN pip install pydub==0.25.1
diff --git a/.github/actions/deepspeech-v3/Dockerfile b/.github/actions/deepspeech-v3/Dockerfile
@@ -34,8 +34,9 @@ RUN pip install torchaudio==0.6.0
 RUN pip install --no-build-isolation fairscale
 
 RUN git clone https://github.com/SeanNaren/deepspeech.pytorch.git
-RUN cd deepspeech.pytorch && pip install -r requirements_test.txt
+RUN cd deepspeech.pytorch && pip install -r requirements.txt
 RUN cd deepspeech.pytorch && pip install -e .
 
 RUN pip install numba==0.50.0
 RUN pip install pytest-cov
+RUN pip install pydub==0.25.1
diff --git a/.github/workflows/ci-deepspeech-v2.yml b/.github/workflows/ci-deepspeech-v2.yml
@@ -22,7 +22,7 @@ jobs:
   test_deepspeech_v2:
     name: PyTorchDeepSpeech v2
     runs-on: ubuntu-latest
-    container: minhitbk/art_testing_envs:deepspeech_v2
+    container: adversarialrobustnesstoolbox/art_testing_envs:deepspeech_v2
     steps:
       - name: Checkout Repo
         uses: actions/checkout@v3

diff --git a/.github/workflows/ci-deepspeech-v3.yml b/.github/workflows/ci-deepspeech-v3.yml
@@ -19,26 +19,13 @@ on:
     - cron: '0 8 * * 0'
 
 jobs:
-  test_deepspeech_v3:
-    name: PyTorchDeepSpeech v3
-    runs-on: ubuntu-latest
-    container: minhitbk/art_testing_envs:deepspeech_v3
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v3
-      - name: Run Test Action
-        uses: ./.github/actions/deepspeech-v3
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v3
-        with:
-          fail_ci_if_error: true
   test_deepspeech_v3_torch_1_10:
     name: PyTorchDeepSpeech v3 / PyTorch 1.10
     runs-on: ubuntu-latest
     container: adversarialrobustnesstoolbox/art_testing_envs:deepspeech_v3_torch_1_10
     steps:
       - name: Checkout Repo
-        uses: actions/checkout@v2.4.0
+        uses: actions/checkout@v3
       - name: Run Test Action
         uses: ./.github/actions/deepspeech-v3
       - name: Upload coverage to Codecov

diff --git a/art/defences/preprocessor/mp3_compression_pytorch.py b/art/defences/preprocessor/mp3_compression_pytorch.py
@@ -115,7 +115,23 @@ def forward(
         :param y: Labels of the sample `x`. This function does not affect them in any way.
         :return: Compressed sample.
         """
+        import torch  # lgtm [py/repeated-import]
+
+        ndim = x.ndim
+
+        if ndim == 1:
+            x = torch.unsqueeze(x, dim=0)
+            if self.channels_first:
+                dim = 1
+            else:
+                dim = 2
+            x = torch.unsqueeze(x, dim=dim)
+
         x_compressed = self._compression_pytorch_numpy.apply(x)
+
+        if ndim == 1:
+            x_compressed = torch.squeeze(x_compressed)
+
         return x_compressed, y
 
     def _check_params(self) -> None:

diff --git a/art/estimators/speech_recognition/pytorch_deep_speech.py b/art/estimators/speech_recognition/pytorch_deep_speech.py
@@ -352,17 +352,17 @@ def predict(
         """
         import torch  # lgtm [py/repeated-import]
 
-        x_in = np.empty(len(x), dtype=object)
-        x_in[:] = list(x)
+        # Apply preprocessing
+        x_preprocessed, _ = self._apply_preprocessing(x, y=None, fit=False)
+
+        x_in = np.empty(len(x_preprocessed), dtype=object)
+        x_in[:] = list(x_preprocessed)
 
         # Put the model in the eval mode
         self._model.eval()
 
-        # Apply preprocessing
-        x_preprocessed, _ = self._apply_preprocessing(x_in, y=None, fit=False)
-
         # Transform x into the model input space
-        inputs, _, input_rates, _, batch_idx = self._transform_model_input(x=x_preprocessed)
+        inputs, _, input_rates, _, batch_idx = self._transform_model_input(x=x_in)
 
         # Compute real input sizes
         input_sizes = input_rates.mul_(inputs.size()[-1]).int()
@@ -437,21 +437,19 @@ def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
                   lengths. A possible example of `y` could be: `y = np.array(['SIXTY ONE', 'HELLO'])`.
         :return: Loss gradients of the same shape as `x`.
         """
-        x_in = np.empty(len(x), dtype=object)
-        x_in[:] = list(x)
+        # Apply preprocessing
+        x_preprocessed, _ = self._apply_preprocessing(x, None, fit=False)
+
+        x_in = np.empty(len(x_preprocessed), dtype=object)
+        x_in[:] = list(x_preprocessed)
 
         # Put the model in the training mode, otherwise CUDA can't backpropagate through the model.
         # However, model uses batch norm layers which need to be frozen
         self._model.train()
         self.set_batchnorm(train=False)
 
-        # Apply preprocessing
-        x_preprocessed, y_preprocessed = self._apply_preprocessing(x_in, y, fit=False)
-
         # Transform data into the model input space
-        inputs, targets, input_rates, target_sizes, _ = self._transform_model_input(
-            x=x_preprocessed, y=y_preprocessed, compute_gradient=True
-        )
+        inputs, targets, input_rates, target_sizes, _ = self._transform_model_input(x=x_in, y=y, compute_gradient=True)
 
         # Compute real input sizes
         input_sizes = input_rates.mul_(inputs.size()[-1]).int()
@@ -484,8 +482,8 @@ def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
 
         # Get results
         results_list = []
-        for i, _ in enumerate(x_preprocessed):
-            results_list.append(x_preprocessed[i].grad.cpu().numpy().copy())
+        for i, _ in enumerate(x_in):
+            results_list.append(x_in[i].grad.cpu().numpy().copy())
 
         results = np.array(results_list)
 
@@ -494,7 +492,7 @@ def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
             results_[:] = list(results)
             results = results_
 
-        results = self._apply_preprocessing_gradient(x_in, results)
+        results = self._apply_preprocessing_gradient(x, results)
 
         if x.dtype != object:
             results = np.array([i for i in results], dtype=x.dtype)  # pylint: disable=R1721
@@ -521,18 +519,19 @@ def fit(self, x: np.ndarray, y: np.ndarray, batch_size: int = 128, nb_epochs: in
         """
         import random
 
-        x_in = np.empty(len(x), dtype=object)
-        x_in[:] = list(x)
+        # Apply preprocessing
+        x_preprocessed, _ = self._apply_preprocessing(x, None, fit=True)
+        y_preprocessed = y
+
+        x_in = np.empty(len(x_preprocessed), dtype=object)
+        x_in[:] = list(x_preprocessed)
 
         # Put the model in the training mode
         self._model.train()
 
         if self.optimizer is None:  # pragma: no cover
             raise ValueError("An optimizer is required to train the model, but none was provided.")
 
-        # Apply preprocessing
-        x_preprocessed, y_preprocessed = self._apply_preprocessing(x_in, y, fit=True)
-
         # Train with batch processing
         num_batch = int(np.ceil(len(x_preprocessed) / float(batch_size)))
         ind = np.arange(len(x_preprocessed))