diff --git a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py index ea3d72b3b30f9e..479cf3c8ba2d6d 100755 --- a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py @@ -1240,10 +1240,10 @@ def forward( LAYOUTLMV2_START_DOCSTRING, ) class LayoutLMv2ForQuestionAnswering(LayoutLMv2PreTrainedModel): - def __init__(self, config): + def __init__(self, config, has_visual_segment_embedding=True): super().__init__(config) self.num_labels = config.num_labels - config.has_visual_segment_embedding = True + config.has_visual_segment_embedding = has_visual_segment_embedding self.layoutlmv2 = LayoutLMv2Model(config) self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels) diff --git a/tests/test_processor_layoutlmv2.py b/tests/test_processor_layoutlmv2.py index 951339b7044d80..ab636dfadcdf56 100644 --- a/tests/test_processor_layoutlmv2.py +++ b/tests/test_processor_layoutlmv2.py @@ -30,6 +30,7 @@ from transformers import LayoutLMv2FeatureExtractor, LayoutLMv2Processor +@require_pytesseract class LayoutLMv2ProcessorTest(unittest.TestCase): def setUp(self): vocab_tokens = [ @@ -106,7 +107,7 @@ def test_save_load_pretrained_additional_features(self): self.assertIsInstance(processor.feature_extractor, LayoutLMv2FeatureExtractor) -# integration tests +# different use cases tests @require_torch @require_pytesseract class LayoutLMv2ProcessorIntegrationTests(unittest.TestCase): diff --git a/tests/test_tokenization_layoutlmv2.py b/tests/test_tokenization_layoutlmv2.py index b7ad946efad492..f654454db173b8 100644 --- a/tests/test_tokenization_layoutlmv2.py +++ b/tests/test_tokenization_layoutlmv2.py @@ -1055,15 +1055,15 @@ def test_batch_encode_plus_tensors(self): words, boxes = self.get_words_and_boxes() # A Tensor cannot be build by sequences which are not the same size - self.assertRaises(ValueError, tokenizer.batch_encode_plus, words, boxes, return_tensors="pt") - self.assertRaises(ValueError, tokenizer.batch_encode_plus, words, boxes, return_tensors="tf") + self.assertRaises(ValueError, tokenizer.batch_encode_plus, words, boxes=boxes, return_tensors="pt") + self.assertRaises(ValueError, tokenizer.batch_encode_plus, words, boxes=boxes, return_tensors="tf") if tokenizer.pad_token_id is None: self.assertRaises( ValueError, tokenizer.batch_encode_plus, words, - boxes, + boxes=boxes, padding=True, return_tensors="pt", ) @@ -1071,16 +1071,16 @@ def test_batch_encode_plus_tensors(self): ValueError, tokenizer.batch_encode_plus, words, - boxes, + boxes=boxes, padding="longest", return_tensors="tf", ) else: - pytorch_tensor = tokenizer.batch_encode_plus(words, boxes, padding=True, return_tensors="pt") + pytorch_tensor = tokenizer.batch_encode_plus(words, boxes=boxes, padding=True, return_tensors="pt") tensorflow_tensor = tokenizer.batch_encode_plus( - words, boxes, padding="longest", return_tensors="tf" + words, boxes=boxes, padding="longest", return_tensors="tf" ) - encoded_sequences = tokenizer.batch_encode_plus(words, boxes, padding=True) + encoded_sequences = tokenizer.batch_encode_plus(words, boxes=boxes, padding=True) for key in encoded_sequences.keys(): pytorch_value = pytorch_tensor[key].tolist()