diff --git a/src/transformers/pipelines/base.py b/src/transformers/pipelines/base.py index 53baca01342687..11eba10589ef3c 100644 --- a/src/transformers/pipelines/base.py +++ b/src/transformers/pipelines/base.py @@ -70,18 +70,30 @@ def _pad(items, key, padding_value, padding_side): if isinstance(items[0][key], torch.Tensor): # Others include `attention_mask` etc... shape = items[0][key].shape - if len(shape) != 2: + dim = len(shape) + if dim == 4: # This is probable image so padding shouldn't be necessary + # B, C, H, W return torch.cat([item[key] for item in items], dim=0) - max_length = max(item[key].shape[-1] for item in items) + max_length = max(item[key].shape[1] for item in items) dtype = items[0][key].dtype - tensor = torch.zeros((batch_size, max_length), dtype=dtype) + padding_value + + if dim == 2: + tensor = torch.zeros((batch_size, max_length), dtype=dtype) + padding_value + elif dim == 3: + tensor = torch.zeros((batch_size, max_length, shape[-1]), dtype=dtype) + padding_value for i, item in enumerate(items): - if padding_side == "left": - tensor[i, -len(item[key][0]) :] = item[key][0].clone() - else: - tensor[i, : len(item[key][0])] = item[key][0].clone() + if dim == 2: + if padding_side == "left": + tensor[i, -len(item[key][0]) :] = item[key][0].clone() + else: + tensor[i, : len(item[key][0])] = item[key][0].clone() + elif dim == 3: + if padding_side == "left": + tensor[i, -len(item[key][0]) :, :] = item[key][0].clone() + else: + tensor[i, : len(item[key][0]), :] = item[key][0].clone() return tensor else: return [item[key] for item in items] @@ -1022,7 +1034,7 @@ def get_iterator( final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params) return final_iterator - def __call__(self, inputs, *args, num_workers=2, batch_size=1, **kwargs): + def __call__(self, inputs, *args, num_workers=0, batch_size=1, **kwargs): if args: logger.warning(f"Ignoring args : {args}") preprocess_params, forward_params, postprocess_params = self._sanitize_parameters(**kwargs) diff --git a/tests/test_pipelines_common.py b/tests/test_pipelines_common.py index 13ca39c63dc3ca..e64d4b8c09f9fb 100644 --- a/tests/test_pipelines_common.py +++ b/tests/test_pipelines_common.py @@ -307,14 +307,23 @@ def test_pipeline_image_padding(self): torch.zeros((2, 3, 10, 10)), ) ) + + @require_torch + def test_pipeline_offset_mapping(self): + import torch + + items = [ + { + "offset_mappings": torch.zeros([1, 11, 2], dtype=torch.long), + }, + { + "offset_mappings": torch.zeros([1, 4, 2], dtype=torch.long), + }, + ] + self.assertTrue( torch.allclose( - _pad(items, "input_ids", 10, "left"), - torch.LongTensor([[10, 10, 1, 23, 24, 2], [1, 23, 24, 43, 44, 2]]), - ) - ) - self.assertTrue( - torch.allclose( - _pad(items, "attention_mask", 0, "right"), torch.LongTensor([[0, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 0]]) - ) + _pad(items, "offset_mappings", 0, "right"), + torch.zeros((2, 11, 2), dtype=torch.long), + ), )