Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for framework-specific preprocessing of object arrays #1702

Merged
merged 10 commits into from
May 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/actions/deepspeech-v2/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ RUN cd warp-ctc/pytorch_binding && python setup.py install

RUN git clone https://github.com/SeanNaren/deepspeech.pytorch.git
RUN cd deepspeech.pytorch && git checkout V2.1
RUN cd deepspeech.pytorch && pip install -r requirements_test.txt
RUN cd deepspeech.pytorch && pip install -r requirements.txt
RUN cd deepspeech.pytorch && pip install -e .

RUN pip install numba==0.50.0
RUN pip install pytest-cov
RUN pip install pydub==0.25.1
3 changes: 2 additions & 1 deletion .github/actions/deepspeech-v3/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ RUN pip install torchaudio==0.6.0
RUN pip install --no-build-isolation fairscale

RUN git clone https://github.com/SeanNaren/deepspeech.pytorch.git
RUN cd deepspeech.pytorch && pip install -r requirements_test.txt
RUN cd deepspeech.pytorch && pip install -r requirements.txt
RUN cd deepspeech.pytorch && pip install -e .

RUN pip install numba==0.50.0
RUN pip install pytest-cov
RUN pip install pydub==0.25.1
2 changes: 1 addition & 1 deletion .github/workflows/ci-deepspeech-v2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
test_deepspeech_v2:
name: PyTorchDeepSpeech v2
runs-on: ubuntu-latest
container: minhitbk/art_testing_envs:deepspeech_v2
container: adversarialrobustnesstoolbox/art_testing_envs:deepspeech_v2
steps:
- name: Checkout Repo
uses: actions/checkout@v3
Expand Down
15 changes: 1 addition & 14 deletions .github/workflows/ci-deepspeech-v3.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,13 @@ on:
- cron: '0 8 * * 0'

jobs:
test_deepspeech_v3:
name: PyTorchDeepSpeech v3
runs-on: ubuntu-latest
container: minhitbk/art_testing_envs:deepspeech_v3
steps:
- name: Checkout Repo
uses: actions/checkout@v3
- name: Run Test Action
uses: ./.github/actions/deepspeech-v3
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
fail_ci_if_error: true
test_deepspeech_v3_torch_1_10:
name: PyTorchDeepSpeech v3 / PyTorch 1.10
runs-on: ubuntu-latest
container: adversarialrobustnesstoolbox/art_testing_envs:deepspeech_v3_torch_1_10
steps:
- name: Checkout Repo
uses: actions/checkout@v2.4.0
uses: actions/checkout@v3
- name: Run Test Action
uses: ./.github/actions/deepspeech-v3
- name: Upload coverage to Codecov
Expand Down
16 changes: 16 additions & 0 deletions art/defences/preprocessor/mp3_compression_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,23 @@ def forward(
:param y: Labels of the sample `x`. This function does not affect them in any way.
:return: Compressed sample.
"""
import torch # lgtm [py/repeated-import]

ndim = x.ndim

if ndim == 1:
x = torch.unsqueeze(x, dim=0)
if self.channels_first:
dim = 1
else:
dim = 2
x = torch.unsqueeze(x, dim=dim)

x_compressed = self._compression_pytorch_numpy.apply(x)

if ndim == 1:
x_compressed = torch.squeeze(x_compressed)

return x_compressed, y

def _check_params(self) -> None:
Expand Down
43 changes: 21 additions & 22 deletions art/estimators/speech_recognition/pytorch_deep_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,17 +352,17 @@ def predict(
"""
import torch # lgtm [py/repeated-import]

x_in = np.empty(len(x), dtype=object)
x_in[:] = list(x)
# Apply preprocessing
x_preprocessed, _ = self._apply_preprocessing(x, y=None, fit=False)

x_in = np.empty(len(x_preprocessed), dtype=object)
x_in[:] = list(x_preprocessed)

# Put the model in the eval mode
self._model.eval()

# Apply preprocessing
x_preprocessed, _ = self._apply_preprocessing(x_in, y=None, fit=False)

# Transform x into the model input space
inputs, _, input_rates, _, batch_idx = self._transform_model_input(x=x_preprocessed)
inputs, _, input_rates, _, batch_idx = self._transform_model_input(x=x_in)

# Compute real input sizes
input_sizes = input_rates.mul_(inputs.size()[-1]).int()
Expand Down Expand Up @@ -437,21 +437,19 @@ def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
lengths. A possible example of `y` could be: `y = np.array(['SIXTY ONE', 'HELLO'])`.
:return: Loss gradients of the same shape as `x`.
"""
x_in = np.empty(len(x), dtype=object)
x_in[:] = list(x)
# Apply preprocessing
x_preprocessed, _ = self._apply_preprocessing(x, None, fit=False)

x_in = np.empty(len(x_preprocessed), dtype=object)
x_in[:] = list(x_preprocessed)

# Put the model in the training mode, otherwise CUDA can't backpropagate through the model.
# However, model uses batch norm layers which need to be frozen
self._model.train()
self.set_batchnorm(train=False)

# Apply preprocessing
x_preprocessed, y_preprocessed = self._apply_preprocessing(x_in, y, fit=False)

# Transform data into the model input space
inputs, targets, input_rates, target_sizes, _ = self._transform_model_input(
x=x_preprocessed, y=y_preprocessed, compute_gradient=True
)
inputs, targets, input_rates, target_sizes, _ = self._transform_model_input(x=x_in, y=y, compute_gradient=True)

# Compute real input sizes
input_sizes = input_rates.mul_(inputs.size()[-1]).int()
Expand Down Expand Up @@ -484,8 +482,8 @@ def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:

# Get results
results_list = []
for i, _ in enumerate(x_preprocessed):
results_list.append(x_preprocessed[i].grad.cpu().numpy().copy())
for i, _ in enumerate(x_in):
results_list.append(x_in[i].grad.cpu().numpy().copy())

results = np.array(results_list)

Expand All @@ -494,7 +492,7 @@ def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
results_[:] = list(results)
results = results_

results = self._apply_preprocessing_gradient(x_in, results)
results = self._apply_preprocessing_gradient(x, results)

if x.dtype != object:
results = np.array([i for i in results], dtype=x.dtype) # pylint: disable=R1721
Expand All @@ -521,18 +519,19 @@ def fit(self, x: np.ndarray, y: np.ndarray, batch_size: int = 128, nb_epochs: in
"""
import random

x_in = np.empty(len(x), dtype=object)
x_in[:] = list(x)
# Apply preprocessing
x_preprocessed, _ = self._apply_preprocessing(x, None, fit=True)
y_preprocessed = y

x_in = np.empty(len(x_preprocessed), dtype=object)
x_in[:] = list(x_preprocessed)

# Put the model in the training mode
self._model.train()

if self.optimizer is None: # pragma: no cover
raise ValueError("An optimizer is required to train the model, but none was provided.")

# Apply preprocessing
x_preprocessed, y_preprocessed = self._apply_preprocessing(x_in, y, fit=True)

# Train with batch processing
num_batch = int(np.ceil(len(x_preprocessed) / float(batch_size)))
ind = np.arange(len(x_preprocessed))
Expand Down
Loading