Skip to content

Commit

Permalink
Apply suggestions from code review
Browse files Browse the repository at this point in the history
  • Loading branch information
NielsRogge committed Nov 2, 2021
1 parent ef920d7 commit b671ea6
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 187 deletions.
7 changes: 2 additions & 5 deletions src/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,11 +480,8 @@
_import_structure["models.detr"].append("DetrFeatureExtractor")
_import_structure["models.layoutlmv2"].append("LayoutLMv2FeatureExtractor")
_import_structure["models.layoutlmv2"].append("LayoutLMv2Processor")
<<<<<<< HEAD
_import_structure["models.segformer"].append("SegformerFeatureExtractor")
=======
_import_structure["models.layoutxlm"].append("LayoutXLMProcessor")
>>>>>>> Move LayoutXLM tokenizers and processor to separate folder
_import_structure["models.segformer"].append("SegformerFeatureExtractor")
_import_structure["models.vit"].append("ViTFeatureExtractor")
else:
from .utils import dummy_vision_objects
Expand Down Expand Up @@ -2359,8 +2356,8 @@
from .models.deit import DeiTFeatureExtractor
from .models.detr import DetrFeatureExtractor
from .models.layoutlmv2 import LayoutLMv2FeatureExtractor, LayoutLMv2Processor
from .models.segformer import SegformerFeatureExtractor
from .models.layoutxlm import LayoutXLMProcessor
from .models.segformer import SegformerFeatureExtractor
from .models.vit import ViTFeatureExtractor
else:
from .utils.dummy_vision_objects import *
Expand Down
1 change: 1 addition & 0 deletions src/transformers/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
ibert,
layoutlm,
layoutlmv2,
layoutxlm,
led,
longformer,
luke,
Expand Down
178 changes: 0 additions & 178 deletions src/transformers/models/layoutxlm/tokenization_layoutxlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,67 +474,6 @@ def _is_valid_text_input(t):
**kwargs,
)

@add_end_docstrings(ENCODE_KWARGS_DOCSTRING, LAYOUTLMV2_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
def batch_encode_plus(
self,
batch_text_or_text_pairs: Union[
List[TextInput],
List[TextInputPair],
List[PreTokenizedInput],
],
is_pair: bool = None,
boxes: Optional[List[List[List[int]]]] = None,
word_labels: Optional[Union[List[int], List[List[int]]]] = None,
add_special_tokens: bool = True,
padding: Union[bool, str, PaddingStrategy] = False,
truncation: Union[bool, str, TruncationStrategy] = False,
max_length: Optional[int] = None,
stride: int = 0,
pad_to_multiple_of: Optional[int] = None,
return_tensors: Optional[Union[str, TensorType]] = None,
return_token_type_ids: Optional[bool] = None,
return_attention_mask: Optional[bool] = None,
return_overflowing_tokens: bool = False,
return_special_tokens_mask: bool = False,
return_offsets_mapping: bool = False,
return_length: bool = False,
verbose: bool = True,
**kwargs
) -> BatchEncoding:
""" """

# Backward compatibility for 'truncation_strategy', 'pad_to_max_length'
padding_strategy, truncation_strategy, max_length, kwargs = self._get_padding_truncation_strategies(
padding=padding,
truncation=truncation,
max_length=max_length,
pad_to_multiple_of=pad_to_multiple_of,
verbose=verbose,
**kwargs,
)

return self._batch_encode_plus(
batch_text_or_text_pairs=batch_text_or_text_pairs,
is_pair=is_pair,
boxes=boxes,
word_labels=word_labels,
add_special_tokens=add_special_tokens,
padding_strategy=padding_strategy,
truncation_strategy=truncation_strategy,
max_length=max_length,
stride=stride,
pad_to_multiple_of=pad_to_multiple_of,
return_tensors=return_tensors,
return_token_type_ids=return_token_type_ids,
return_attention_mask=return_attention_mask,
return_overflowing_tokens=return_overflowing_tokens,
return_special_tokens_mask=return_special_tokens_mask,
return_offsets_mapping=return_offsets_mapping,
return_length=return_length,
verbose=verbose,
**kwargs,
)

def _batch_encode_plus(
self,
batch_text_or_text_pairs: Union[
Expand Down Expand Up @@ -662,123 +601,6 @@ def _batch_prepare_for_model(

return batch_outputs

@add_end_docstrings(ENCODE_KWARGS_DOCSTRING)
def encode(
self,
text: Union[TextInput, PreTokenizedInput],
text_pair: Optional[PreTokenizedInput] = None,
boxes: Optional[List[List[int]]] = None,
word_labels: Optional[List[int]] = None,
add_special_tokens: bool = True,
padding: Union[bool, str, PaddingStrategy] = False,
truncation: Union[bool, str, TruncationStrategy] = False,
max_length: Optional[int] = None,
stride: int = 0,
pad_to_multiple_of: Optional[int] = None,
return_tensors: Optional[Union[str, TensorType]] = None,
return_token_type_ids: Optional[bool] = None,
return_attention_mask: Optional[bool] = None,
return_overflowing_tokens: bool = False,
return_special_tokens_mask: bool = False,
return_offsets_mapping: bool = False,
return_length: bool = False,
verbose: bool = True,
**kwargs
) -> List[int]:
"""
...
"""
encoded_inputs = self.encode_plus(
text=text,
text_pair=text_pair,
boxes=boxes,
word_labels=word_labels,
add_special_tokens=add_special_tokens,
padding=padding,
truncation=truncation,
max_length=max_length,
stride=stride,
pad_to_multiple_of=pad_to_multiple_of,
return_tensors=return_tensors,
return_token_type_ids=return_token_type_ids,
return_attention_mask=return_attention_mask,
return_overflowing_tokens=return_overflowing_tokens,
return_special_tokens_mask=return_special_tokens_mask,
return_offsets_mapping=return_offsets_mapping,
return_length=return_length,
verbose=verbose,
**kwargs,
)

return encoded_inputs["input_ids"]

@add_end_docstrings(ENCODE_KWARGS_DOCSTRING, LAYOUTLMV2_ENCODE_PLUS_ADDITIONAL_KWARGS_DOCSTRING)
def encode_plus(
self,
text: Union[TextInput, PreTokenizedInput],
text_pair: Optional[PreTokenizedInput] = None,
boxes: Optional[List[List[int]]] = None,
word_labels: Optional[List[int]] = None,
add_special_tokens: bool = True,
padding: Union[bool, str, PaddingStrategy] = False,
truncation: Union[bool, str, TruncationStrategy] = False,
max_length: Optional[int] = None,
stride: int = 0,
pad_to_multiple_of: Optional[int] = None,
return_tensors: Optional[Union[str, TensorType]] = None,
return_token_type_ids: Optional[bool] = None,
return_attention_mask: Optional[bool] = None,
return_overflowing_tokens: bool = False,
return_special_tokens_mask: bool = False,
return_offsets_mapping: bool = False,
return_length: bool = False,
verbose: bool = True,
**kwargs
) -> BatchEncoding:
"""
Tokenize and prepare for the model a sequence or a pair of sequences. .. warning:: This method is deprecated,
``__call__`` should be used instead.
Args:
text (:obj:`str`, :obj:`List[str]`, :obj:`List[List[str]]`):
The first sequence to be encoded. This can be a string, a list of strings or a list of list of strings.
text_pair (:obj:`List[str]` or :obj:`List[int]`, `optional`):
Optional second sequence to be encoded. This can be a list of strings (words of a single example) or a
list of list of strings (words of a batch of examples).
"""

# Backward compatibility for 'truncation_strategy', 'pad_to_max_length'
padding_strategy, truncation_strategy, max_length, kwargs = self._get_padding_truncation_strategies(
padding=padding,
truncation=truncation,
max_length=max_length,
pad_to_multiple_of=pad_to_multiple_of,
verbose=verbose,
**kwargs,
)

return self._encode_plus(
text=text,
boxes=boxes,
text_pair=text_pair,
word_labels=word_labels,
add_special_tokens=add_special_tokens,
padding_strategy=padding_strategy,
truncation_strategy=truncation_strategy,
max_length=max_length,
stride=stride,
pad_to_multiple_of=pad_to_multiple_of,
return_tensors=return_tensors,
return_token_type_ids=return_token_type_ids,
return_attention_mask=return_attention_mask,
return_overflowing_tokens=return_overflowing_tokens,
return_special_tokens_mask=return_special_tokens_mask,
return_offsets_mapping=return_offsets_mapping,
return_length=return_length,
verbose=verbose,
**kwargs,
)

def _encode_plus(
self,
text: Union[TextInput, PreTokenizedInput],
Expand Down
9 changes: 5 additions & 4 deletions src/transformers/utils/dummy_vision_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["vision"])


class SegformerFeatureExtractor:
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])

class LayoutXLMProcessor:
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])
Expand All @@ -63,6 +59,11 @@ def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["vision"])


class SegformerFeatureExtractor:
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])


class ViTFeatureExtractor:
def __init__(self, *args, **kwargs):
requires_backends(self, ["vision"])

0 comments on commit b671ea6

Please sign in to comment.