From 3cc23eee06884c9f5f6565e9654d2dc9f122c07c Mon Sep 17 00:00:00 2001 From: Lysandre Debut Date: Thu, 9 Jul 2020 13:35:21 -0400 Subject: [PATCH] More explicit error when failing to tensorize overflowing tokens (#5633) --- src/transformers/tokenization_utils_base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py index 0e4b30a568b7aa..8fc9f9199c985f 100644 --- a/src/transformers/tokenization_utils_base.py +++ b/src/transformers/tokenization_utils_base.py @@ -511,6 +511,11 @@ def convert_to_tensors(self, tensor_type: Union[None, str, TensorType], prepend_ self[key] = tensor except: # noqa E722 + if key == "overflowing_tokens": + raise ValueError( + "Unable to create tensor returning overflowing tokens of different lengths. " + "Please see if a fast version of this tokenizer is available to have this feature available." + ) raise ValueError( "Unable to create tensor, you should probably activate truncation and/or padding " "with 'padding=True' 'truncation=True' to have batched tensors with the same length."