Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make AutoProcessor a magic loading class for all modalities #18963

Merged
merged 2 commits into from
Sep 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions src/transformers/models/auto/processing_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,16 @@
from ...dynamic_module_utils import get_class_from_dynamic_module
from ...feature_extraction_utils import FeatureExtractionMixin
from ...tokenization_utils import TOKENIZER_CONFIG_FILE
from ...utils import CONFIG_NAME, FEATURE_EXTRACTOR_NAME, get_file_from_repo, logging
from ...utils import FEATURE_EXTRACTOR_NAME, get_file_from_repo, logging
from .auto_factory import _LazyAutoMapping
from .configuration_auto import (
CONFIG_MAPPING_NAMES,
AutoConfig,
model_type_to_module_name,
replace_list_option_in_docstrings,
)
from .feature_extraction_auto import AutoFeatureExtractor
from .tokenization_auto import AutoTokenizer


logger = logging.get_logger(__name__)
Expand Down Expand Up @@ -250,10 +252,24 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
if type(config) in PROCESSOR_MAPPING:
return PROCESSOR_MAPPING[type(config)].from_pretrained(pretrained_model_name_or_path, **kwargs)

# At this stage, there doesn't seem to be a `Processor` class available for this model, so let's try a
# tokenizer.
try:
return AutoTokenizer.from_pretrained(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the order doesn't matter too much here since if both tokenizer and feature extractor would be present then the model also should have a processor?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I don't see when we could have both without a processor.

pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
)
except Exception:
try:
return AutoFeatureExtractor.from_pretrained(
pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
)
except Exception:
pass

raise ValueError(
f"Unrecognized processor in {pretrained_model_name_or_path}. Should have a `processor_type` key in "
f"its {FEATURE_EXTRACTOR_NAME}, or one of the following `model_type` keys in its {CONFIG_NAME}: "
f"{', '.join(c for c in PROCESSOR_MAPPING_NAMES.keys())}"
f"Unrecognized processing class in {pretrained_model_name_or_path}. Can't instantiate a processor, a "
"tokenizer or a feature extractor for this model. Make sure the repository contains the files of at least "
"one of those processing classes."
)

@staticmethod
Expand Down
8 changes: 8 additions & 0 deletions tests/models/auto/test_processor_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,14 @@ def test_new_processor_registration(self):
if CustomConfig in PROCESSOR_MAPPING._extra_content:
del PROCESSOR_MAPPING._extra_content[CustomConfig]

def test_auto_processor_creates_tokenizer(self):
processor = AutoProcessor.from_pretrained("hf-internal-testing/tiny-random-bert")
self.assertEqual(processor.__class__.__name__, "BertTokenizerFast")

def test_auto_processor_creates_feature_extractor(self):
processor = AutoProcessor.from_pretrained("hf-internal-testing/tiny-random-convnext")
self.assertEqual(processor.__class__.__name__, "ConvNextFeatureExtractor")


@is_staging_test
class ProcessorPushToHubTester(unittest.TestCase):
Expand Down