Skip to content

Commit

Permalink
Custom padding for offset mappings.
Browse files Browse the repository at this point in the history
  • Loading branch information
Narsil committed Oct 18, 2021
1 parent d198958 commit 376923a
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 16 deletions.
28 changes: 20 additions & 8 deletions src/transformers/pipelines/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,30 @@ def _pad(items, key, padding_value, padding_side):
if isinstance(items[0][key], torch.Tensor):
# Others include `attention_mask` etc...
shape = items[0][key].shape
if len(shape) != 2:
dim = len(shape)
if dim == 4:
# This is probable image so padding shouldn't be necessary
# B, C, H, W
return torch.cat([item[key] for item in items], dim=0)
max_length = max(item[key].shape[-1] for item in items)
max_length = max(item[key].shape[1] for item in items)
dtype = items[0][key].dtype
tensor = torch.zeros((batch_size, max_length), dtype=dtype) + padding_value

if dim == 2:
tensor = torch.zeros((batch_size, max_length), dtype=dtype) + padding_value
elif dim == 3:
tensor = torch.zeros((batch_size, max_length, shape[-1]), dtype=dtype) + padding_value

for i, item in enumerate(items):
if padding_side == "left":
tensor[i, -len(item[key][0]) :] = item[key][0].clone()
else:
tensor[i, : len(item[key][0])] = item[key][0].clone()
if dim == 2:
if padding_side == "left":
tensor[i, -len(item[key][0]) :] = item[key][0].clone()
else:
tensor[i, : len(item[key][0])] = item[key][0].clone()
elif dim == 3:
if padding_side == "left":
tensor[i, -len(item[key][0]) :, :] = item[key][0].clone()
else:
tensor[i, : len(item[key][0]), :] = item[key][0].clone()
return tensor
else:
return [item[key] for item in items]
Expand Down Expand Up @@ -1022,7 +1034,7 @@ def get_iterator(
final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params)
return final_iterator

def __call__(self, inputs, *args, num_workers=2, batch_size=1, **kwargs):
def __call__(self, inputs, *args, num_workers=0, batch_size=1, **kwargs):
if args:
logger.warning(f"Ignoring args : {args}")
preprocess_params, forward_params, postprocess_params = self._sanitize_parameters(**kwargs)
Expand Down
25 changes: 17 additions & 8 deletions tests/test_pipelines_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,14 +307,23 @@ def test_pipeline_image_padding(self):
torch.zeros((2, 3, 10, 10)),
)
)

@require_torch
def test_pipeline_offset_mapping(self):
import torch

items = [
{
"offset_mappings": torch.zeros([1, 11, 2], dtype=torch.long),
},
{
"offset_mappings": torch.zeros([1, 4, 2], dtype=torch.long),
},
]

self.assertTrue(
torch.allclose(
_pad(items, "input_ids", 10, "left"),
torch.LongTensor([[10, 10, 1, 23, 24, 2], [1, 23, 24, 43, 44, 2]]),
)
)
self.assertTrue(
torch.allclose(
_pad(items, "attention_mask", 0, "right"), torch.LongTensor([[0, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 0]])
)
_pad(items, "offset_mappings", 0, "right"),
torch.zeros((2, 11, 2), dtype=torch.long),
),
)

0 comments on commit 376923a

Please sign in to comment.