From 3cc23eee06884c9f5f6565e9654d2dc9f122c07c Mon Sep 17 00:00:00 2001
From: Lysandre Debut <lysandre@huggingface.co>
Date: Thu, 9 Jul 2020 13:35:21 -0400
Subject: [PATCH] More explicit error when failing to tensorize overflowing
 tokens (#5633)

---
 src/transformers/tokenization_utils_base.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/transformers/tokenization_utils_base.py b/src/transformers/tokenization_utils_base.py
index 0e4b30a568b7aa..8fc9f9199c985f 100644
--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -511,6 +511,11 @@ def convert_to_tensors(self, tensor_type: Union[None, str, TensorType], prepend_
 
                 self[key] = tensor
             except:  # noqa E722
+                if key == "overflowing_tokens":
+                    raise ValueError(
+                        "Unable to create tensor returning overflowing tokens of different lengths. "
+                        "Please see if a fast version of this tokenizer is available to have this feature available."
+                    )
                 raise ValueError(
                     "Unable to create tensor, you should probably activate truncation and/or padding "
                     "with 'padding=True' 'truncation=True' to have batched tensors with the same length."