diff --git a/optimum/commands/export/neuronx.py b/optimum/commands/export/neuronx.py index fc1d2c73e..2e43015b5 100644 --- a/optimum/commands/export/neuronx.py +++ b/optimum/commands/export/neuronx.py @@ -58,7 +58,7 @@ def parse_args_neuronx(parser: "ArgumentParser"): type=str, choices=["transformers", "diffusers", "sentence_transformers"], default=None, - help=("The library on the model." " If not provided, will attempt to infer the local checkpoint's library."), + help=("The library of the model." " If not provided, will attempt to infer the local checkpoint's library."), ) optional_group.add_argument( "--subfolder", diff --git a/optimum/exporters/neuron/__main__.py b/optimum/exporters/neuron/__main__.py index f53f008af..1094b80f6 100644 --- a/optimum/exporters/neuron/__main__.py +++ b/optimum/exporters/neuron/__main__.py @@ -108,6 +108,7 @@ def infer_task(task: str, model_name_or_path: str) -> str: return task +# This function is not applicable for diffusers / sentence transformers models def get_input_shapes_and_config_class(task: str, args: argparse.Namespace) -> Dict[str, int]: config = AutoConfig.from_pretrained(args.model) @@ -116,7 +117,10 @@ def get_input_shapes_and_config_class(task: str, args: argparse.Namespace) -> Di model_type = model_type + "-encoder" neuron_config_constructor = TasksManager.get_exporter_config_constructor( - model_type=model_type, exporter="neuron", task=task + model_type=model_type, + exporter="neuron", + task=task, + library_name="transformers", ) input_args = neuron_config_constructor.func.get_input_args_for_task(task) input_shapes = {name: getattr(args, name) for name in input_args} @@ -232,6 +236,7 @@ def _get_submodels_and_neuron_configs( input_shapes: Dict[str, int], task: str, output: Path, + library_name: Optional[str] = None, dynamic_batch_size: bool = False, model_name_or_path: Optional[Union[str, Path]] = None, submodels: Optional[Dict[str, Union[Path, str]]] = None, @@ -250,7 +255,12 @@ def _get_submodels_and_neuron_configs( f"`output_attentions` and `output_hidden_states` are not supported by the {task} task yet." ) models_and_neuron_configs, output_model_names = _get_submodels_and_neuron_configs_for_stable_diffusion( - model, input_shapes, task, output, dynamic_batch_size, submodels + model, + input_shapes, + task, + output, + dynamic_batch_size, + submodels, ) elif is_encoder_decoder: optional_outputs = {"output_attentions": output_attentions, "output_hidden_states": output_hidden_states} @@ -264,7 +274,10 @@ def _get_submodels_and_neuron_configs( f"`output_attentions` and `output_hidden_states` are not supported by the {task} task yet." ) neuron_config_constructor = TasksManager.get_exporter_config_constructor( - model=model, exporter="neuron", task=task + model=model, + exporter="neuron", + task=task, + library_name=library_name, ) neuron_config = neuron_config_constructor(model.config, dynamic_batch_size=dynamic_batch_size, **input_shapes) model_name = getattr(model, "name_or_path", None) or model_name_or_path @@ -387,6 +400,9 @@ def main_export( task = TasksManager.map_from_synonym(task) is_stable_diffusion = "stable-diffusion" in task + library_name = TasksManager.infer_library_from_model( + model_name_or_path, subfolder=subfolder, library_name=library_name + ) model_kwargs = { "task": task, @@ -407,6 +423,7 @@ def main_export( model=model, input_shapes=input_shapes, task=task, + library_name=library_name, output=output, dynamic_batch_size=dynamic_batch_size, model_name_or_path=model_name_or_path, diff --git a/optimum/exporters/neuron/model_configs.py b/optimum/exporters/neuron/model_configs.py index 294928bbb..1b6ce4b2e 100644 --- a/optimum/exporters/neuron/model_configs.py +++ b/optimum/exporters/neuron/model_configs.py @@ -190,7 +190,9 @@ class DebertaV2NeuronConfig(ElectraNeuronConfig): pass -@register_in_tasks_manager("sentence-transformers-transformer", *["feature-extraction", "sentence-similarity"]) +@register_in_tasks_manager( + "transformer", *["feature-extraction", "sentence-similarity"], library_name="sentence_transformers" +) class SentenceTransformersTransformerNeuronConfig(TextEncoderNeuronConfig): NORMALIZED_CONFIG_CLASS = NormalizedTextConfig CUSTOM_MODEL_WRAPPER = SentenceTransformersTransformerNeuronWrapper @@ -226,7 +228,7 @@ def outputs(self) -> List[str]: return ["logits_per_image", "logits_per_text", "text_embeds", "image_embeds"] -@register_in_tasks_manager("clip-text-with-projection", *["feature-extraction"]) +@register_in_tasks_manager("clip-text-with-projection", *["feature-extraction"], library_name="diffusers") class CLIPTextWithProjectionNeuronConfig(TextEncoderNeuronConfig): MODEL_TYPE = "clip-text-model" ATOL_FOR_VALIDATION = 1e-3 @@ -252,7 +254,7 @@ def outputs(self) -> List[str]: return common_outputs -@register_in_tasks_manager("clip-text-model", *["feature-extraction"]) +@register_in_tasks_manager("clip-text-model", *["feature-extraction"], library_name="diffusers") class CLIPTextNeuronConfig(CLIPTextWithProjectionNeuronConfig): MODEL_TYPE = "clip-text-model" @@ -268,7 +270,9 @@ def outputs(self) -> List[str]: # TODO: We should decouple clip text and vision, this would need fix on Optimum main. For the current workaround # users can pass dummy text inputs when encoding image, vice versa. -@register_in_tasks_manager("sentence-transformers-clip", *["feature-extraction", "sentence-similarity"]) +@register_in_tasks_manager( + "clip", *["feature-extraction", "sentence-similarity"], library_name="sentence_transformers" +) class SentenceTransformersCLIPNeuronConfig(CLIPNeuronConfig): CUSTOM_MODEL_WRAPPER = SentenceTransformersCLIPNeuronWrapper ATOL_FOR_VALIDATION = 1e-3 @@ -282,7 +286,7 @@ def patch_model_for_export(self, model, dummy_inputs): return self.CUSTOM_MODEL_WRAPPER(model, list(dummy_inputs.keys())) -@register_in_tasks_manager("unet", *["semantic-segmentation"]) +@register_in_tasks_manager("unet", *["semantic-segmentation"], library_name="diffusers") class UNetNeuronConfig(VisionNeuronConfig): ATOL_FOR_VALIDATION = 1e-3 INPUT_ARGS = ("batch_size", "sequence_length", "num_channels", "width", "height") @@ -356,7 +360,7 @@ def is_sdxl(self, is_sdxl: bool): self._is_sdxl = is_sdxl -@register_in_tasks_manager("vae-encoder", *["semantic-segmentation"]) +@register_in_tasks_manager("vae-encoder", *["semantic-segmentation"], library_name="diffusers") class VaeEncoderNeuronConfig(VisionNeuronConfig): ATOL_FOR_VALIDATION = 1e-3 MODEL_TYPE = "vae-encoder" @@ -392,7 +396,7 @@ def generate_dummy_inputs(self, return_tuple: bool = False, **kwargs): return dummy_inputs -@register_in_tasks_manager("vae-decoder", *["semantic-segmentation"]) +@register_in_tasks_manager("vae-decoder", *["semantic-segmentation"], library_name="diffusers") class VaeDecoderNeuronConfig(VisionNeuronConfig): ATOL_FOR_VALIDATION = 1e-3 MODEL_TYPE = "vae-decoder" diff --git a/optimum/exporters/neuron/utils.py b/optimum/exporters/neuron/utils.py index 6c9678675..eb3d799d3 100644 --- a/optimum/exporters/neuron/utils.py +++ b/optimum/exporters/neuron/utils.py @@ -163,12 +163,16 @@ def get_stable_diffusion_models_for_export( Neuron configs for the different components of the model. """ models_for_export = _get_submodels_for_export_stable_diffusion(pipeline=pipeline, task=task) + library_name = "diffusers" # Text encoders if DIFFUSION_MODEL_TEXT_ENCODER_NAME in models_for_export: text_encoder = models_for_export[DIFFUSION_MODEL_TEXT_ENCODER_NAME] text_encoder_config_constructor = TasksManager.get_exporter_config_constructor( - model=text_encoder, exporter="neuron", task="feature-extraction" + model=text_encoder, + exporter="neuron", + task="feature-extraction", + library_name=library_name, ) text_encoder_neuron_config = text_encoder_config_constructor( text_encoder.config, @@ -185,6 +189,7 @@ def get_stable_diffusion_models_for_export( exporter="neuron", task="feature-extraction", model_type="clip-text-with-projection", + library_name=library_name, ) text_encoder_neuron_config_2 = text_encoder_config_constructor_2( text_encoder_2.config, @@ -197,7 +202,11 @@ def get_stable_diffusion_models_for_export( # U-NET unet = models_for_export[DIFFUSION_MODEL_UNET_NAME] unet_neuron_config_constructor = TasksManager.get_exporter_config_constructor( - model=unet, exporter="neuron", task="semantic-segmentation", model_type="unet" + model=unet, + exporter="neuron", + task="semantic-segmentation", + model_type="unet", + library_name=library_name, ) unet_neuron_config = unet_neuron_config_constructor( unet.config, @@ -216,6 +225,7 @@ def get_stable_diffusion_models_for_export( exporter="neuron", task="semantic-segmentation", model_type="vae-encoder", + library_name=library_name, ) vae_encoder_neuron_config = vae_encoder_config_constructor( vae_encoder.config, @@ -232,6 +242,7 @@ def get_stable_diffusion_models_for_export( exporter="neuron", task="semantic-segmentation", model_type="vae-decoder", + library_name=library_name, ) vae_decoder_neuron_config = vae_decoder_config_constructor( vae_decoder.config, @@ -382,7 +393,10 @@ def get_encoder_decoder_models_for_export( # Encoder model_type = getattr(model.config, "model_type") + "-encoder" encoder_config_constructor = TasksManager.get_exporter_config_constructor( - exporter="neuron", model_type=model_type, task=task + exporter="neuron", + model_type=model_type, + task=task, + library_name="transformers", ) check_mandatory_input_shapes(encoder_config_constructor, task, input_shapes) encoder_neuron_config = encoder_config_constructor( @@ -396,7 +410,10 @@ def get_encoder_decoder_models_for_export( # Decoder model_type = getattr(model.config, "model_type") + "-decoder" decoder_config_constructor = TasksManager.get_exporter_config_constructor( - exporter="neuron", model_type=model_type, task=task + exporter="neuron", + model_type=model_type, + task=task, + library_name="transformers", ) check_mandatory_input_shapes(encoder_config_constructor, task, input_shapes) decoder_neuron_config = decoder_config_constructor( diff --git a/optimum/neuron/modeling.py b/optimum/neuron/modeling.py index 9f37f9b9e..fa2fdb574 100644 --- a/optimum/neuron/modeling.py +++ b/optimum/neuron/modeling.py @@ -204,6 +204,7 @@ class NeuronModelForSentenceTransformers(NeuronBaseModel): """ auto_model_class = AutoModel + library_name = "sentence_transformers" @add_start_docstrings_to_model_forward( NEURON_TEXT_INPUTS_DOCSTRING.format("batch_size, sequence_length") diff --git a/optimum/neuron/modeling_base.py b/optimum/neuron/modeling_base.py index e26d42afe..e00c9305e 100644 --- a/optimum/neuron/modeling_base.py +++ b/optimum/neuron/modeling_base.py @@ -75,6 +75,7 @@ class NeuronBaseModel(OptimizedModel): model_type = "neuron_model" auto_model_class = AutoModel + library_name = "transformers" def __init__( self, @@ -250,6 +251,7 @@ def _export( """ if task is None: task = TasksManager.infer_task_from_model(cls.auto_model_class) + library_name = TasksManager.infer_library_from_model(model_id, subfolder=subfolder, library_name=library_name) save_dir = TemporaryDirectory() save_dir_path = Path(save_dir.name) @@ -270,7 +272,10 @@ def _export( task = TasksManager.map_from_synonym(task) neuron_config_constructor = TasksManager.get_exporter_config_constructor( - model=model, exporter="neuron", task=task + model=model, + exporter="neuron", + task=task, + library_name=library_name, ) input_shapes = {} @@ -321,8 +326,8 @@ def _export( **compiler_kwargs, ) - store_compilation_config( - config=config, + config = store_compilation_config( + config=model.config, input_shapes=input_shapes, compiler_kwargs=compiler_kwargs, input_names=input_names, @@ -437,7 +442,10 @@ def _neuron_config_init(cls, config: "PretrainedConfig") -> "NeuronDefaultConfig task = TasksManager.map_from_synonym(task) model_type = neuron_config.get("model_type", None) or config.model_type neuron_config_constructor = TasksManager.get_exporter_config_constructor( - model_type=model_type, exporter="neuron", task=task + model_type=model_type, + exporter="neuron", + task=task, + library_name=cls.library_name, ) return neuron_config_constructor( diff --git a/optimum/neuron/modeling_diffusion.py b/optimum/neuron/modeling_diffusion.py index e6b9080ee..ef00f377b 100644 --- a/optimum/neuron/modeling_diffusion.py +++ b/optimum/neuron/modeling_diffusion.py @@ -80,6 +80,7 @@ class NeuronStableDiffusionPipelineBase(NeuronBaseModel): auto_model_class = StableDiffusionPipeline + library_name = "diffusers" base_model_prefix = "neuron_model" config_name = "model_index.json" sub_component_config_name = "config.json" @@ -653,6 +654,7 @@ def _export( use_auth_token=use_auth_token, do_validation=False, submodels={"unet": unet_id}, + library_name=cls.library_name, **input_shapes, ) diff --git a/optimum/neuron/utils/argument_utils.py b/optimum/neuron/utils/argument_utils.py index 208535796..499334667 100644 --- a/optimum/neuron/utils/argument_utils.py +++ b/optimum/neuron/utils/argument_utils.py @@ -179,7 +179,9 @@ def store_compilation_config( config_args["input_names"] = input_names config_args["output_names"] = output_names - original_model_type = getattr(config, "model_type", None) + original_model_type = getattr(config, "export_model_type", None) or getattr( + config, "model_type", None + ) # prioritize sentence_transformers to transformers neuron_model_type = str(model_type).replace("_", "-") if model_type is not None else model_type if original_model_type is None: update_func( diff --git a/setup.py b/setup.py index f6883dfda..05197419f 100644 --- a/setup.py +++ b/setup.py @@ -15,14 +15,14 @@ INSTALL_REQUIRES = [ "transformers == 4.36.2", "accelerate == 0.23.0", - "optimum >= 1.16.2", + "optimum ~= 1.17.0", "huggingface_hub >= 0.20.1", "numpy>=1.22.2, <=1.25.2", "protobuf<4", ] TESTS_REQUIRE = [ - "pytest", + "pytest <= 8.0.0", "psutil", "parameterized", "GitPython", diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index b4b8e32b9..f1325c546 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -44,8 +44,8 @@ } SENTENCE_TRANSFORMERS_MODELS = { - "sentence-transformers-transformer": "sentence-transformers/all-MiniLM-L6-v2", - "sentence-transformers-clip": "sentence-transformers/clip-ViT-B-32", + "transformer": "sentence-transformers/all-MiniLM-L6-v2", + "clip": "sentence-transformers/clip-ViT-B-32", } WEIGHTS_NEFF_SEPARATION_UNSUPPORTED_ARCH = ["camembert", "roberta"] diff --git a/tests/exporters/test_export.py b/tests/exporters/test_export.py index a1b8c1ccd..c889bf998 100644 --- a/tests/exporters/test_export.py +++ b/tests/exporters/test_export.py @@ -61,12 +61,15 @@ def _get_models_to_test( export_models_dict: Dict, exclude_model_types: Optional[List[str]] = None, + library_name: str = "transformers", ): models_to_test = [] for model_type, model_names_tasks in export_models_dict.items(): model_type = model_type.replace("_", "-") if exclude_model_types is None or (model_type not in exclude_model_types): - task_config_mapping = TasksManager.get_supported_tasks_for_model_type(model_type, "neuron") + task_config_mapping = TasksManager.get_supported_tasks_for_model_type( + model_type, "neuron", library_name=library_name + ) if isinstance(model_names_tasks, str): # test export of all tasks on the same model tasks = list(task_config_mapping.keys()) @@ -83,6 +86,7 @@ def _get_models_to_test( neuron_config_constructor = TasksManager.get_exporter_config_constructor( model_type=model_type, exporter="neuron", + library_name=library_name, task=task, model_name=model_name, exporter_config_kwargs={**default_shapes}, @@ -119,8 +123,9 @@ def _neuronx_export( dynamic_batch_size: bool = False, inline_weights_to_neff: bool = True, ): - if "sentence-transformers" in model_type: - model_class = TasksManager.get_model_class_for_task(task, framework="pt", library="sentence_transformers") + library_name = TasksManager.infer_library_from_model(model_name) + if library_name == "sentence_transformers": + model_class = TasksManager.get_model_class_for_task(task, framework="pt", library=library_name) model = model_class(model_name) if "clip" in model[0].__class__.__name__.lower(): config = model[0].model.config @@ -161,13 +166,17 @@ def _neuronx_export( except (RuntimeError, ValueError) as e: self.fail(f"{model_type}, {task} -> {e}") - @parameterized.expand(_get_models_to_test(EXPORT_MODELS_TINY)) + @parameterized.expand(_get_models_to_test(EXPORT_MODELS_TINY, library_name="transformers")) @is_inferentia_test def test_export(self, test_name, name, model_name, task, neuron_config_constructor): self._neuronx_export(test_name, name, model_name, task, neuron_config_constructor) @parameterized.expand( - _get_models_to_test(EXPORT_MODELS_TINY, exclude_model_types=WEIGHTS_NEFF_SEPARATION_UNSUPPORTED_ARCH) + _get_models_to_test( + EXPORT_MODELS_TINY, + exclude_model_types=WEIGHTS_NEFF_SEPARATION_UNSUPPORTED_ARCH, + library_name="transformers", + ) ) @is_inferentia_test def test_export_separated_weights(self, test_name, name, model_name, task, neuron_config_constructor): @@ -175,14 +184,14 @@ def test_export_separated_weights(self, test_name, name, model_name, task, neuro test_name, name, model_name, task, neuron_config_constructor, inline_weights_to_neff=False ) - @parameterized.expand(_get_models_to_test(SENTENCE_TRANSFORMERS_MODELS)) + @parameterized.expand(_get_models_to_test(SENTENCE_TRANSFORMERS_MODELS, library_name="sentence_transformers")) @is_inferentia_test @require_sentence_transformers @requires_neuronx def test_export_sentence_transformers(self, test_name, name, model_name, task, neuron_config_constructor): self._neuronx_export(test_name, name, model_name, task, neuron_config_constructor) - @parameterized.expand(_get_models_to_test(EXPORT_MODELS_TINY), skip_on_empty=True) + @parameterized.expand(_get_models_to_test(EXPORT_MODELS_TINY, library_name="transformers"), skip_on_empty=True) @is_inferentia_test @requires_neuronx def test_export_with_dynamic_batch_size(self, test_name, name, model_name, task, neuron_config_constructor): diff --git a/tests/inference/test_modeling.py b/tests/inference/test_modeling.py index 3884b3517..3cea7f913 100644 --- a/tests/inference/test_modeling.py +++ b/tests/inference/test_modeling.py @@ -333,7 +333,11 @@ class NeuronModelForSentenceTransformersIntegrationTest(NeuronModelTestMixin): TASK = "feature-extraction" ATOL_FOR_VALIDATION = 1e-2 # TODO: only support text models so far, will support vision next - SUPPORTED_ARCHITECTURES = ["sentence-transformers-transformer"] + SUPPORTED_ARCHITECTURES = ["transformer"] + ARCH_MODEL_MAP = { + "transformer": "sentence-transformers/all-MiniLM-L6-v2", + "clip": "sentence-transformers/clip-ViT-B-32", + } @parameterized.expand(SUPPORTED_ARCHITECTURES, skip_on_empty=True) @requires_neuronx