From 625e4b171734404ef3104571ab6f55d1541d8685 Mon Sep 17 00:00:00 2001
From: JingyaHuang <jingya@huggingface.co>
Date: Wed, 17 Jan 2024 16:40:23 +0000
Subject: [PATCH 01/14] test the refactoring

---
 .github/workflows/test_inf1.yml           |  2 +-
 .github/workflows/test_inf2.yml           |  1 +
 optimum/exporters/neuron/model_configs.py | 10 +++++-----
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/test_inf1.yml b/.github/workflows/test_inf1.yml
index 780d4df3f..690fd3a99 100644
--- a/.github/workflows/test_inf1.yml
+++ b/.github/workflows/test_inf1.yml
@@ -38,7 +38,7 @@ jobs:
           python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
           python -m pip install .[neuron,tests]
           python -m pip uninstall optimum -y
-          python -m pip install optimum
+          python -m pip install git+https://github.com/fxmarty/optimum.git@do-not-override-modeltype
       - name: Run CLI tests
         run: |
           source aws_neuron_venv_pytorch/bin/activate
diff --git a/.github/workflows/test_inf2.yml b/.github/workflows/test_inf2.yml
index a296128ce..a5e3f8201 100644
--- a/.github/workflows/test_inf2.yml
+++ b/.github/workflows/test_inf2.yml
@@ -34,6 +34,7 @@ jobs:
           source aws_neuron_venv_pytorch/bin/activate
           python -m pip install -U pip
           python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
+          python -m pip install git+https://github.com/fxmarty/optimum.git@do-not-override-modeltype
           python -m pip install .[neuronx,tests]
       - name: Run cache tests
         run: |
diff --git a/optimum/exporters/neuron/model_configs.py b/optimum/exporters/neuron/model_configs.py
index 38ee03a62..77c38901f 100644
--- a/optimum/exporters/neuron/model_configs.py
+++ b/optimum/exporters/neuron/model_configs.py
@@ -174,7 +174,7 @@ class DebertaV2NeuronConfig(DebertaNeuronConfig):
     pass
 
 
-@register_in_tasks_manager("sentence-transformers-transformer", *["feature-extraction", "sentence-similarity"])
+@register_in_tasks_manager("transformer", *["feature-extraction", "sentence-similarity"], library_name="sentence_transformers")
 class SentenceTransformersTransformerNeuronConfig(TextEncoderNeuronConfig):
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
     CUSTOM_MODEL_WRAPPER = SentenceTransformersTransformerNeuronWrapper
@@ -252,7 +252,7 @@ def outputs(self) -> List[str]:
 
 # TODO: We should decouple clip text and vision, this would need fix on Optimum main. For the current workaround
 # users can pass dummy text inputs when encoding image, vice versa.
-@register_in_tasks_manager("sentence-transformers-clip", *["feature-extraction", "sentence-similarity"])
+@register_in_tasks_manager("clip", *["feature-extraction", "sentence-similarity"], library_name="sentence_transformers")
 class SentenceTransformersCLIPNeuronConfig(CLIPNeuronConfig):
     CUSTOM_MODEL_WRAPPER = SentenceTransformersCLIPNeuronWrapper
     ATOL_FOR_VALIDATION = 1e-3
@@ -266,7 +266,7 @@ def patch_model_for_export(self, model, dummy_inputs):
         return self.CUSTOM_MODEL_WRAPPER(model, list(dummy_inputs.keys()))
 
 
-@register_in_tasks_manager("unet", *["semantic-segmentation"])
+@register_in_tasks_manager("unet", *["semantic-segmentation"], library_name="diffusers")
 class UNetNeuronConfig(VisionNeuronConfig):
     ATOL_FOR_VALIDATION = 1e-3
     MANDATORY_AXES = ("batch_size", "sequence_length", "num_channels", "width", "height")
@@ -340,7 +340,7 @@ def is_sdxl(self, is_sdxl: bool):
         self._is_sdxl = is_sdxl
 
 
-@register_in_tasks_manager("vae-encoder", *["semantic-segmentation"])
+@register_in_tasks_manager("vae-encoder", *["semantic-segmentation"], library_name="diffusers")
 class VaeEncoderNeuronConfig(VisionNeuronConfig):
     ATOL_FOR_VALIDATION = 1e-3
     MODEL_TYPE = "vae-encoder"
@@ -376,7 +376,7 @@ def generate_dummy_inputs(self, return_tuple: bool = False, **kwargs):
             return dummy_inputs
 
 
-@register_in_tasks_manager("vae-decoder", *["semantic-segmentation"])
+@register_in_tasks_manager("vae-decoder", *["semantic-segmentation"], library_name="diffusers")
 class VaeDecoderNeuronConfig(VisionNeuronConfig):
     ATOL_FOR_VALIDATION = 1e-3
     MODEL_TYPE = "vae-decoder"

From 3a311b4f70963fce0458d6ff23a461e1dc4aee73 Mon Sep 17 00:00:00 2001
From: JingyaHuang <jingya@huggingface.co>
Date: Thu, 18 Jan 2024 13:44:04 +0000
Subject: [PATCH 02/14] fix test

---
 optimum/exporters/neuron/model_configs.py | 8 ++++++--
 tests/exporters/test_export.py            | 5 +++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/optimum/exporters/neuron/model_configs.py b/optimum/exporters/neuron/model_configs.py
index 77c38901f..083d47224 100644
--- a/optimum/exporters/neuron/model_configs.py
+++ b/optimum/exporters/neuron/model_configs.py
@@ -174,7 +174,9 @@ class DebertaV2NeuronConfig(DebertaNeuronConfig):
     pass
 
 
-@register_in_tasks_manager("transformer", *["feature-extraction", "sentence-similarity"], library_name="sentence_transformers")
+@register_in_tasks_manager(
+    "transformer", *["feature-extraction", "sentence-similarity"], library_name="sentence_transformers"
+)
 class SentenceTransformersTransformerNeuronConfig(TextEncoderNeuronConfig):
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
     CUSTOM_MODEL_WRAPPER = SentenceTransformersTransformerNeuronWrapper
@@ -252,7 +254,9 @@ def outputs(self) -> List[str]:
 
 # TODO: We should decouple clip text and vision, this would need fix on Optimum main. For the current workaround
 # users can pass dummy text inputs when encoding image, vice versa.
-@register_in_tasks_manager("clip", *["feature-extraction", "sentence-similarity"], library_name="sentence_transformers")
+@register_in_tasks_manager(
+    "clip", *["feature-extraction", "sentence-similarity"], library_name="sentence_transformers"
+)
 class SentenceTransformersCLIPNeuronConfig(CLIPNeuronConfig):
     CUSTOM_MODEL_WRAPPER = SentenceTransformersCLIPNeuronWrapper
     ATOL_FOR_VALIDATION = 1e-3
diff --git a/tests/exporters/test_export.py b/tests/exporters/test_export.py
index 9ce117176..8aac34cff 100644
--- a/tests/exporters/test_export.py
+++ b/tests/exporters/test_export.py
@@ -107,8 +107,9 @@ def _neuronx_export(
         neuron_config_constructor: "NeuronConfig",
         dynamic_batch_size: bool = False,
     ):
-        if "sentence-transformers" in model_type:
-            model_class = TasksManager.get_model_class_for_task(task, framework="pt", library="sentence_transformers")
+        library_name = TasksManager.infer_library_from_model(model_name)
+        if library_name == "sentence-transformers":
+            model_class = TasksManager.get_model_class_for_task(task, framework="pt", library=library_name)
             model = model_class(model_name)
             if "clip" in model[0].__class__.__name__.lower():
                 config = model[0].model.config

From 9a536ba64e2cd037ba7968e7fc491f1af1b32a5e Mon Sep 17 00:00:00 2001
From: JingyaHuang <jingya@huggingface.co>
Date: Fri, 19 Jan 2024 16:52:57 +0000
Subject: [PATCH 03/14] bump optimum

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 66ba45cbe..a89b684af 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@
 INSTALL_REQUIRES = [
     "transformers == 4.36.2",
     "accelerate == 0.23.0",
-    "optimum >= 1.14.0",
+    "optimum >= 1.16.2",
     "huggingface_hub >= 0.20.1",
     "numpy>=1.22.2, <=1.25.2",
     "protobuf<4",

From 4fab487ea658a6d98adcc77b57d851c6b8d2308b Mon Sep 17 00:00:00 2001
From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com>
Date: Tue, 30 Jan 2024 00:36:19 +0000
Subject: [PATCH 04/14] fix sd

---
 optimum/exporters/neuron/utils.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/optimum/exporters/neuron/utils.py b/optimum/exporters/neuron/utils.py
index 6c9678675..bd5819074 100644
--- a/optimum/exporters/neuron/utils.py
+++ b/optimum/exporters/neuron/utils.py
@@ -197,7 +197,11 @@ def get_stable_diffusion_models_for_export(
     # U-NET
     unet = models_for_export[DIFFUSION_MODEL_UNET_NAME]
     unet_neuron_config_constructor = TasksManager.get_exporter_config_constructor(
-        model=unet, exporter="neuron", task="semantic-segmentation", model_type="unet"
+        model=unet,
+        exporter="neuron",
+        task="semantic-segmentation",
+        model_type="unet",
+        library_name="diffusers",
     )
     unet_neuron_config = unet_neuron_config_constructor(
         unet.config,
@@ -216,6 +220,7 @@ def get_stable_diffusion_models_for_export(
         exporter="neuron",
         task="semantic-segmentation",
         model_type="vae-encoder",
+        library_name="diffusers",
     )
     vae_encoder_neuron_config = vae_encoder_config_constructor(
         vae_encoder.config,
@@ -232,6 +237,7 @@ def get_stable_diffusion_models_for_export(
         exporter="neuron",
         task="semantic-segmentation",
         model_type="vae-decoder",
+        library_name="diffusers",
     )
     vae_decoder_neuron_config = vae_decoder_config_constructor(
         vae_decoder.config,
@@ -382,7 +388,10 @@ def get_encoder_decoder_models_for_export(
     # Encoder
     model_type = getattr(model.config, "model_type") + "-encoder"
     encoder_config_constructor = TasksManager.get_exporter_config_constructor(
-        exporter="neuron", model_type=model_type, task=task
+        exporter="neuron",
+        model_type=model_type,
+        task=task,
+        library_name="transformers",
     )
     check_mandatory_input_shapes(encoder_config_constructor, task, input_shapes)
     encoder_neuron_config = encoder_config_constructor(
@@ -396,7 +405,10 @@ def get_encoder_decoder_models_for_export(
     # Decoder
     model_type = getattr(model.config, "model_type") + "-decoder"
     decoder_config_constructor = TasksManager.get_exporter_config_constructor(
-        exporter="neuron", model_type=model_type, task=task
+        exporter="neuron",
+        model_type=model_type,
+        task=task,
+        library_name="transformers",
     )
     check_mandatory_input_shapes(encoder_config_constructor, task, input_shapes)
     decoder_neuron_config = decoder_config_constructor(

From c633d29f15c8a8379eecfa31992f9d1a4f426ada Mon Sep 17 00:00:00 2001
From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com>
Date: Tue, 30 Jan 2024 18:09:18 +0000
Subject: [PATCH 05/14] add trfrs even for defaults

---
 optimum/exporters/neuron/__main__.py |  6 +++++-
 optimum/exporters/neuron/utils.py    |  6 +++++-
 tests/exporters/exporters_utils.py   |  4 ++--
 tests/exporters/test_export.py       | 15 +++++++++------
 4 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/optimum/exporters/neuron/__main__.py b/optimum/exporters/neuron/__main__.py
index a4c2eb28c..67a0bd7c6 100644
--- a/optimum/exporters/neuron/__main__.py
+++ b/optimum/exporters/neuron/__main__.py
@@ -108,6 +108,7 @@ def infer_task(task: str, model_name_or_path: str) -> str:
     return task
 
 
+# This function is not applicable for diffusers / sentence transformers models
 def get_input_shapes_and_config_class(task: str, args: argparse.Namespace) -> Dict[str, int]:
     config = AutoConfig.from_pretrained(args.model)
 
@@ -116,7 +117,10 @@ def get_input_shapes_and_config_class(task: str, args: argparse.Namespace) -> Di
         model_type = model_type + "-encoder"
 
     neuron_config_constructor = TasksManager.get_exporter_config_constructor(
-        model_type=model_type, exporter="neuron", task=task
+        model_type=model_type,
+        exporter="neuron",
+        task=task,
+        library_name="transformers",
     )
     input_args = neuron_config_constructor.func.get_input_args_for_task(task)
     input_shapes = {name: getattr(args, name) for name in input_args}
diff --git a/optimum/exporters/neuron/utils.py b/optimum/exporters/neuron/utils.py
index bd5819074..0ce8a3325 100644
--- a/optimum/exporters/neuron/utils.py
+++ b/optimum/exporters/neuron/utils.py
@@ -168,7 +168,10 @@ def get_stable_diffusion_models_for_export(
     if DIFFUSION_MODEL_TEXT_ENCODER_NAME in models_for_export:
         text_encoder = models_for_export[DIFFUSION_MODEL_TEXT_ENCODER_NAME]
         text_encoder_config_constructor = TasksManager.get_exporter_config_constructor(
-            model=text_encoder, exporter="neuron", task="feature-extraction"
+            model=text_encoder,
+            exporter="neuron",
+            task="feature-extraction",
+            library_name="transformers",
         )
         text_encoder_neuron_config = text_encoder_config_constructor(
             text_encoder.config,
@@ -185,6 +188,7 @@ def get_stable_diffusion_models_for_export(
             exporter="neuron",
             task="feature-extraction",
             model_type="clip-text-with-projection",
+            library_name="transformers",
         )
         text_encoder_neuron_config_2 = text_encoder_config_constructor_2(
             text_encoder_2.config,
diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
index 419d689cd..6cbcc8c82 100644
--- a/tests/exporters/exporters_utils.py
+++ b/tests/exporters/exporters_utils.py
@@ -43,8 +43,8 @@
 }
 
 SENTENCE_TRANSFORMERS_MODELS = {
-    "sentence-transformers-transformer": "sentence-transformers/all-MiniLM-L6-v2",
-    "sentence-transformers-clip": "sentence-transformers/clip-ViT-B-32",
+    "transformer": "sentence-transformers/all-MiniLM-L6-v2",
+    "clip": "sentence-transformers/clip-ViT-B-32",
 }
 
 SEED = 42
diff --git a/tests/exporters/test_export.py b/tests/exporters/test_export.py
index 54af2e026..3fc4dfe78 100644
--- a/tests/exporters/test_export.py
+++ b/tests/exporters/test_export.py
@@ -56,11 +56,13 @@
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 
 
-def _get_models_to_test(export_models_dict: Dict):
+def _get_models_to_test(export_models_dict: Dict, library_name: str = "transformers"):
     models_to_test = []
     for model_type, model_names_tasks in export_models_dict.items():
         model_type = model_type.replace("_", "-")
-        task_config_mapping = TasksManager.get_supported_tasks_for_model_type(model_type, "neuron")
+        task_config_mapping = TasksManager.get_supported_tasks_for_model_type(
+            model_type, "neuron", library_name=library_name
+        )
 
         if isinstance(model_names_tasks, str):  # test export of all tasks on the same model
             tasks = list(task_config_mapping.keys())
@@ -77,6 +79,7 @@ def _get_models_to_test(export_models_dict: Dict):
                 neuron_config_constructor = TasksManager.get_exporter_config_constructor(
                     model_type=model_type,
                     exporter="neuron",
+                    library_name=library_name,
                     task=task,
                     model_name=model_name,
                     exporter_config_kwargs={**default_shapes},
@@ -108,7 +111,7 @@ def _neuronx_export(
         dynamic_batch_size: bool = False,
     ):
         library_name = TasksManager.infer_library_from_model(model_name)
-        if library_name == "sentence-transformers":
+        if library_name == "sentence_transformers":
             model_class = TasksManager.get_model_class_for_task(task, framework="pt", library=library_name)
             model = model_class(model_name)
             if "clip" in model[0].__class__.__name__.lower():
@@ -149,12 +152,12 @@ def _neuronx_export(
             except (RuntimeError, ValueError) as e:
                 self.fail(f"{model_type}, {task} -> {e}")
 
-    @parameterized.expand(_get_models_to_test(EXPORT_MODELS_TINY))
+    @parameterized.expand(_get_models_to_test(EXPORT_MODELS_TINY, library_name="transformers"))
     @is_inferentia_test
     def test_export(self, test_name, name, model_name, task, neuron_config_constructor):
         self._neuronx_export(test_name, name, model_name, task, neuron_config_constructor)
 
-    @parameterized.expand(_get_models_to_test(SENTENCE_TRANSFORMERS_MODELS))
+    @parameterized.expand(_get_models_to_test(SENTENCE_TRANSFORMERS_MODELS, library_name="sentence_transformers"))
     @is_inferentia_test
     @require_vision
     @require_sentence_transformers
@@ -162,7 +165,7 @@ def test_export(self, test_name, name, model_name, task, neuron_config_construct
     def test_export_sentence_transformers(self, test_name, name, model_name, task, neuron_config_constructor):
         self._neuronx_export(test_name, name, model_name, task, neuron_config_constructor)
 
-    @parameterized.expand(_get_models_to_test(EXPORT_MODELS_TINY), skip_on_empty=True)
+    @parameterized.expand(_get_models_to_test(EXPORT_MODELS_TINY, library_name="transformers"), skip_on_empty=True)
     @is_inferentia_test
     @requires_neuronx
     def test_export_with_dynamic_batch_size(self, test_name, name, model_name, task, neuron_config_constructor):

From b7068735e94d41bcf8517473344b6482000a848a Mon Sep 17 00:00:00 2001
From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com>
Date: Thu, 1 Feb 2024 14:25:30 +0000
Subject: [PATCH 06/14] merge

---
 tests/distributed/distributed.py |  3 ++-
 tests/exporters/test_export.py   | 16 ++++++++--------
 tests/test_cache_utils.py        | 12 ++++++------
 3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/tests/distributed/distributed.py b/tests/distributed/distributed.py
index c3aabf8e2..690140cd1 100644
--- a/tests/distributed/distributed.py
+++ b/tests/distributed/distributed.py
@@ -97,7 +97,8 @@ class DistributedExec(ABC):
     exec_timeout: int = TEST_TIMEOUT
 
     @abstractmethod
-    def run(self): ...
+    def run(self):
+        ...
 
     def __call__(self, request=None):
         self._fixture_kwargs = self._get_fixture_kwargs(request, self.run)
diff --git a/tests/exporters/test_export.py b/tests/exporters/test_export.py
index 62c304c3c..0c5eae1de 100644
--- a/tests/exporters/test_export.py
+++ b/tests/exporters/test_export.py
@@ -68,8 +68,8 @@ def _get_models_to_test(
         model_type = model_type.replace("_", "-")
         if exclude_model_types is None or (model_type not in exclude_model_types):
             task_config_mapping = TasksManager.get_supported_tasks_for_model_type(
-            model_type, "neuron", library_name=library_name
-        )
+                model_type, "neuron", library_name=library_name
+            )
 
             if isinstance(model_names_tasks, str):  # test export of all tasks on the same model
                 tasks = list(task_config_mapping.keys())
@@ -171,11 +171,12 @@ def _neuronx_export(
     def test_export(self, test_name, name, model_name, task, neuron_config_constructor):
         self._neuronx_export(test_name, name, model_name, task, neuron_config_constructor)
 
-<<<<<<< HEAD
-    @parameterized.expand(_get_models_to_test(SENTENCE_TRANSFORMERS_MODELS, library_name="sentence_transformers"))
-=======
     @parameterized.expand(
-        _get_models_to_test(EXPORT_MODELS_TINY, exclude_model_types=WEIGHTS_NEFF_SEPARATION_UNSUPPORTED_ARCH)
+        _get_models_to_test(
+            EXPORT_MODELS_TINY,
+            exclude_model_types=WEIGHTS_NEFF_SEPARATION_UNSUPPORTED_ARCH,
+            library_name="transformers",
+        )
     )
     @is_inferentia_test
     @requires_neuronx
@@ -184,8 +185,7 @@ def test_export_separated_weights(self, test_name, name, model_name, task, neuro
             test_name, name, model_name, task, neuron_config_constructor, inline_weights_to_neff=False
         )
 
-    @parameterized.expand(_get_models_to_test(SENTENCE_TRANSFORMERS_MODELS))
->>>>>>> main
+    @parameterized.expand(_get_models_to_test(SENTENCE_TRANSFORMERS_MODELS, library_name="sentence_transformers"))
     @is_inferentia_test
     @require_vision
     @require_sentence_transformers
diff --git a/tests/test_cache_utils.py b/tests/test_cache_utils.py
index 567de3178..ffd2c2e7d 100644
--- a/tests/test_cache_utils.py
+++ b/tests/test_cache_utils.py
@@ -83,9 +83,9 @@ def test_get_neuron_cache_path(self):
         assert get_neuron_cache_path() is None
 
         custom_cache_dir_name = Path("_this/is_/my1/2custom/cache/dir")
-        os.environ["NEURON_CC_FLAGS"] = (
-            f"--some --parameters --here --cache_dir={custom_cache_dir_name} --other --paremeters --here"
-        )
+        os.environ[
+            "NEURON_CC_FLAGS"
+        ] = f"--some --parameters --here --cache_dir={custom_cache_dir_name} --other --paremeters --here"
 
         self.assertEqual(get_neuron_cache_path(), custom_cache_dir_name)
 
@@ -99,9 +99,9 @@ def _test_set_neuron_cache_path(self, new_cache_path):
         set_neuron_cache_path(new_cache_path, ignore_no_cache=True)
         self.assertEqual(get_neuron_cache_path(), Path(new_cache_path))
 
-        os.environ["NEURON_CC_FLAGS"] = (
-            "--some --parameters --here --cache_dir=original_cache_dir --other --paremeters"
-        )
+        os.environ[
+            "NEURON_CC_FLAGS"
+        ] = "--some --parameters --here --cache_dir=original_cache_dir --other --paremeters"
         set_neuron_cache_path(new_cache_path)
         self.assertEqual(get_neuron_cache_path(), Path(new_cache_path))
 

From cc4207047254c752b3d4f14f9e3b8a58113de0fc Mon Sep 17 00:00:00 2001
From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com>
Date: Fri, 16 Feb 2024 09:18:12 +0000
Subject: [PATCH 07/14] bump optimum & fix style

---
 setup.py                         |  2 +-
 tests/distributed/distributed.py |  3 +--
 tests/test_cache_utils.py        | 12 ++++++------
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/setup.py b/setup.py
index 031e1bafa..eb4b02d75 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@
 INSTALL_REQUIRES = [
     "transformers == 4.36.2",
     "accelerate == 0.23.0",
-    "optimum >= 1.16.2",
+    "optimum ~= 1.17.0",
     "huggingface_hub >= 0.20.1",
     "numpy>=1.22.2, <=1.25.2",
     "protobuf<4",
diff --git a/tests/distributed/distributed.py b/tests/distributed/distributed.py
index 690140cd1..c3aabf8e2 100644
--- a/tests/distributed/distributed.py
+++ b/tests/distributed/distributed.py
@@ -97,8 +97,7 @@ class DistributedExec(ABC):
     exec_timeout: int = TEST_TIMEOUT
 
     @abstractmethod
-    def run(self):
-        ...
+    def run(self): ...
 
     def __call__(self, request=None):
         self._fixture_kwargs = self._get_fixture_kwargs(request, self.run)
diff --git a/tests/test_cache_utils.py b/tests/test_cache_utils.py
index ffd2c2e7d..567de3178 100644
--- a/tests/test_cache_utils.py
+++ b/tests/test_cache_utils.py
@@ -83,9 +83,9 @@ def test_get_neuron_cache_path(self):
         assert get_neuron_cache_path() is None
 
         custom_cache_dir_name = Path("_this/is_/my1/2custom/cache/dir")
-        os.environ[
-            "NEURON_CC_FLAGS"
-        ] = f"--some --parameters --here --cache_dir={custom_cache_dir_name} --other --paremeters --here"
+        os.environ["NEURON_CC_FLAGS"] = (
+            f"--some --parameters --here --cache_dir={custom_cache_dir_name} --other --paremeters --here"
+        )
 
         self.assertEqual(get_neuron_cache_path(), custom_cache_dir_name)
 
@@ -99,9 +99,9 @@ def _test_set_neuron_cache_path(self, new_cache_path):
         set_neuron_cache_path(new_cache_path, ignore_no_cache=True)
         self.assertEqual(get_neuron_cache_path(), Path(new_cache_path))
 
-        os.environ[
-            "NEURON_CC_FLAGS"
-        ] = "--some --parameters --here --cache_dir=original_cache_dir --other --paremeters"
+        os.environ["NEURON_CC_FLAGS"] = (
+            "--some --parameters --here --cache_dir=original_cache_dir --other --paremeters"
+        )
         set_neuron_cache_path(new_cache_path)
         self.assertEqual(get_neuron_cache_path(), Path(new_cache_path))
 

From 71d4d039cb354cfa30f9a1735cc93b6fcd28e2f0 Mon Sep 17 00:00:00 2001
From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com>
Date: Fri, 16 Feb 2024 09:19:42 +0000
Subject: [PATCH 08/14] restore CI

---
 .github/workflows/test_inf2.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/workflows/test_inf2.yml b/.github/workflows/test_inf2.yml
index f36a61b10..333c0bd08 100644
--- a/.github/workflows/test_inf2.yml
+++ b/.github/workflows/test_inf2.yml
@@ -34,7 +34,6 @@ jobs:
           source aws_neuron_venv_pytorch/bin/activate
           python -m pip install -U pip
           python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
-          python -m pip install git+https://github.com/fxmarty/optimum.git@do-not-override-modeltype
           python -m pip install .[neuronx,tests]
       - name: Run cache tests
         run: |

From 5fc8efa229b28c5d8c94f0e299d98dee8669bbbf Mon Sep 17 00:00:00 2001
From: David Corvoysier <david@huggingface.co>
Date: Fri, 16 Feb 2024 08:06:02 +0000
Subject: [PATCH 09/14] ci: reduce export and pipelines test frequency

This runs export and pipelines tests in dedicated pipelines with
stricter path filters to avoid running them on every change.
---
 .github/workflows/test_inf1_export.yml      |  5 ++-
 .github/workflows/test_inf1_full_export.yml | 48 +++++++++++++++++++++
 .github/workflows/test_inf1_inference.yml   |  6 +--
 .github/workflows/test_inf1_pipelines.yml   | 43 ++++++++++++++++++
 .github/workflows/test_inf2_export.yml      |  5 ++-
 .github/workflows/test_inf2_full_export.yml | 39 +++++++++++++++++
 6 files changed, 137 insertions(+), 9 deletions(-)
 create mode 100644 .github/workflows/test_inf1_full_export.yml
 create mode 100644 .github/workflows/test_inf1_pipelines.yml
 create mode 100644 .github/workflows/test_inf2_full_export.yml

diff --git a/.github/workflows/test_inf1_export.yml b/.github/workflows/test_inf1_export.yml
index be3bf5954..c23fce75c 100644
--- a/.github/workflows/test_inf1_export.yml
+++ b/.github/workflows/test_inf1_export.yml
@@ -1,4 +1,4 @@
-name: Optimum neuron / Test INF1 export
+name: Optimum neuron / Test INF1 partial export
 
 on:
   push:
@@ -18,7 +18,7 @@ concurrency:
 
 jobs:
   do-the-job:
-    name: Run INF1 tests
+    name: Run INF1 export tests
     runs-on: [self-hosted, 4-aws-inf1, 24-cpu, ci]
     env:
       AWS_REGION: us-east-1
@@ -46,4 +46,5 @@ jobs:
       - name: Run export tests
         run: |
           source aws_neuron_venv_pytorch/bin/activate
+          export MAX_EXPORT_TEST_COMBINATIONS=1
           HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters
diff --git a/.github/workflows/test_inf1_full_export.yml b/.github/workflows/test_inf1_full_export.yml
new file mode 100644
index 000000000..e08c7e3d3
--- /dev/null
+++ b/.github/workflows/test_inf1_full_export.yml
@@ -0,0 +1,48 @@
+name: Optimum neuron / Test INF1 full export
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "optimum/exporters/neuron/*.py"
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "optimum/exporters/neuron/*.py"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  do-the-job:
+    name: Run INF1 full export tests
+    runs-on: [self-hosted, 4-aws-inf1, 24-cpu, ci]
+    env:
+      AWS_REGION: us-east-1
+    steps:
+      - name: Check AMI
+        run: dpkg -l | grep neuron
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Install system packages
+        run: |
+          sudo apt install python3.8-venv -y
+      - name: Install python packages
+        run: |
+          python3 -m venv aws_neuron_venv_pytorch
+          source aws_neuron_venv_pytorch/bin/activate
+          python -m pip install -U pip
+          python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
+          python -m pip install .[neuron,tests]
+          python -m pip uninstall optimum -y
+          python -m pip install optimum
+      - name: Run CLI tests
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/cli
+      - name: Run export tests
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          export MAX_EXPORT_TEST_COMBINATIONS=10
+          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters
diff --git a/.github/workflows/test_inf1_inference.yml b/.github/workflows/test_inf1_inference.yml
index 5bdd9571a..c6ee49170 100644
--- a/.github/workflows/test_inf1_inference.yml
+++ b/.github/workflows/test_inf1_inference.yml
@@ -1,4 +1,4 @@
-name: Optimum neuron / Test INF1 inference & pipelines
+name: Optimum neuron / Test INF1 inference
 
 on:
   push:
@@ -43,7 +43,3 @@ jobs:
         run: |
           source aws_neuron_venv_pytorch/bin/activate
           HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/inference
-      - name: Run pipelines tests
-        run: |
-          source aws_neuron_venv_pytorch/bin/activate
-          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/pipelines
diff --git a/.github/workflows/test_inf1_pipelines.yml b/.github/workflows/test_inf1_pipelines.yml
new file mode 100644
index 000000000..ab53c91e5
--- /dev/null
+++ b/.github/workflows/test_inf1_pipelines.yml
@@ -0,0 +1,43 @@
+name: Optimum neuron / Test INF1 pipelines
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "optimum/neuron/pipelines/**.py"
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "optimum/neuron/pipelines/**.py"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  do-the-job:
+    name: Run INF1 tests
+    runs-on: [self-hosted, 4-aws-inf1, 24-cpu, ci]
+    env:
+      AWS_REGION: us-east-1
+    steps:
+      - name: Check AMI
+        run: dpkg -l | grep neuron
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Install system packages
+        run: |
+          sudo apt install python3.8-venv -y
+      - name: Install python packages
+        run: |
+          python3 -m venv aws_neuron_venv_pytorch
+          source aws_neuron_venv_pytorch/bin/activate
+          python -m pip install -U pip
+          python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
+          python -m pip install .[neuron,tests]
+          python -m pip uninstall optimum -y
+          python -m pip install optimum
+      - name: Run pipelines tests
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/pipelines
diff --git a/.github/workflows/test_inf2_export.yml b/.github/workflows/test_inf2_export.yml
index 796b0933a..bd02d1e13 100644
--- a/.github/workflows/test_inf2_export.yml
+++ b/.github/workflows/test_inf2_export.yml
@@ -1,4 +1,4 @@
-name: Optimum neuron / Test INF2 export
+name: Optimum neuron / Test INF2 partial export
 
 on:
   push:
@@ -18,7 +18,7 @@ concurrency:
 
 jobs:
   do-the-job:
-    name: Run INF2 tests
+    name: Run INF2 export tests
     runs-on: [self-hosted, 1-aws-inf2, 32-cpu, ci] # run the job on the newly created runner
     env:
       AWS_REGION: us-east-1
@@ -38,4 +38,5 @@ jobs:
       - name: Run exporters tests
         run: |
           source aws_neuron_venv_pytorch/bin/activate
+          export MAX_EXPORT_TEST_COMBINATIONS=1
           HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters
diff --git a/.github/workflows/test_inf2_full_export.yml b/.github/workflows/test_inf2_full_export.yml
new file mode 100644
index 000000000..3ee8b7cae
--- /dev/null
+++ b/.github/workflows/test_inf2_full_export.yml
@@ -0,0 +1,39 @@
+name: Optimum neuron / Test INF2 full export
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "optimum/exporters/neuron/*.py"
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "optimum/exporters/neuron/*.py"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  do-the-job:
+    name: Run INF2 full export tests
+    runs-on: [self-hosted, 1-aws-inf2, 32-cpu, ci] # run the job on the newly created runner
+    env:
+      AWS_REGION: us-east-1
+    steps:
+      - name: Check AMI
+        run: dpkg -l | grep neuron
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Install python dependencies
+        run: |
+          sudo apt install python3.8-venv -y
+          python3 -m venv aws_neuron_venv_pytorch
+          source aws_neuron_venv_pytorch/bin/activate
+          python -m pip install -U pip
+          python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
+          python -m pip install .[neuronx,tests]
+      - name: Run exporters tests
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters

From 48899c4fb429cbea126153f700bd9d03041c6918 Mon Sep 17 00:00:00 2001
From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com>
Date: Fri, 16 Feb 2024 10:00:58 +0000
Subject: [PATCH 10/14] Revert "ci: reduce export and pipelines test frequency"

This reverts commit 5fc8efa229b28c5d8c94f0e299d98dee8669bbbf.
---
 .github/workflows/test_inf1_export.yml      |  5 +--
 .github/workflows/test_inf1_full_export.yml | 48 ---------------------
 .github/workflows/test_inf1_inference.yml   |  6 ++-
 .github/workflows/test_inf1_pipelines.yml   | 43 ------------------
 .github/workflows/test_inf2_export.yml      |  5 +--
 .github/workflows/test_inf2_full_export.yml | 39 -----------------
 6 files changed, 9 insertions(+), 137 deletions(-)
 delete mode 100644 .github/workflows/test_inf1_full_export.yml
 delete mode 100644 .github/workflows/test_inf1_pipelines.yml
 delete mode 100644 .github/workflows/test_inf2_full_export.yml

diff --git a/.github/workflows/test_inf1_export.yml b/.github/workflows/test_inf1_export.yml
index c23fce75c..be3bf5954 100644
--- a/.github/workflows/test_inf1_export.yml
+++ b/.github/workflows/test_inf1_export.yml
@@ -1,4 +1,4 @@
-name: Optimum neuron / Test INF1 partial export
+name: Optimum neuron / Test INF1 export
 
 on:
   push:
@@ -18,7 +18,7 @@ concurrency:
 
 jobs:
   do-the-job:
-    name: Run INF1 export tests
+    name: Run INF1 tests
     runs-on: [self-hosted, 4-aws-inf1, 24-cpu, ci]
     env:
       AWS_REGION: us-east-1
@@ -46,5 +46,4 @@ jobs:
       - name: Run export tests
         run: |
           source aws_neuron_venv_pytorch/bin/activate
-          export MAX_EXPORT_TEST_COMBINATIONS=1
           HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters
diff --git a/.github/workflows/test_inf1_full_export.yml b/.github/workflows/test_inf1_full_export.yml
deleted file mode 100644
index e08c7e3d3..000000000
--- a/.github/workflows/test_inf1_full_export.yml
+++ /dev/null
@@ -1,48 +0,0 @@
-name: Optimum neuron / Test INF1 full export
-
-on:
-  push:
-    branches: [ main ]
-    paths:
-      - "optimum/exporters/neuron/*.py"
-  pull_request:
-    branches: [ main ]
-    paths:
-      - "optimum/exporters/neuron/*.py"
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  do-the-job:
-    name: Run INF1 full export tests
-    runs-on: [self-hosted, 4-aws-inf1, 24-cpu, ci]
-    env:
-      AWS_REGION: us-east-1
-    steps:
-      - name: Check AMI
-        run: dpkg -l | grep neuron
-      - name: Checkout
-        uses: actions/checkout@v2
-      - name: Install system packages
-        run: |
-          sudo apt install python3.8-venv -y
-      - name: Install python packages
-        run: |
-          python3 -m venv aws_neuron_venv_pytorch
-          source aws_neuron_venv_pytorch/bin/activate
-          python -m pip install -U pip
-          python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
-          python -m pip install .[neuron,tests]
-          python -m pip uninstall optimum -y
-          python -m pip install optimum
-      - name: Run CLI tests
-        run: |
-          source aws_neuron_venv_pytorch/bin/activate
-          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/cli
-      - name: Run export tests
-        run: |
-          source aws_neuron_venv_pytorch/bin/activate
-          export MAX_EXPORT_TEST_COMBINATIONS=10
-          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters
diff --git a/.github/workflows/test_inf1_inference.yml b/.github/workflows/test_inf1_inference.yml
index c6ee49170..5bdd9571a 100644
--- a/.github/workflows/test_inf1_inference.yml
+++ b/.github/workflows/test_inf1_inference.yml
@@ -1,4 +1,4 @@
-name: Optimum neuron / Test INF1 inference
+name: Optimum neuron / Test INF1 inference & pipelines
 
 on:
   push:
@@ -43,3 +43,7 @@ jobs:
         run: |
           source aws_neuron_venv_pytorch/bin/activate
           HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/inference
+      - name: Run pipelines tests
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/pipelines
diff --git a/.github/workflows/test_inf1_pipelines.yml b/.github/workflows/test_inf1_pipelines.yml
deleted file mode 100644
index ab53c91e5..000000000
--- a/.github/workflows/test_inf1_pipelines.yml
+++ /dev/null
@@ -1,43 +0,0 @@
-name: Optimum neuron / Test INF1 pipelines
-
-on:
-  push:
-    branches: [ main ]
-    paths:
-      - "optimum/neuron/pipelines/**.py"
-  pull_request:
-    branches: [ main ]
-    paths:
-      - "optimum/neuron/pipelines/**.py"
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  do-the-job:
-    name: Run INF1 tests
-    runs-on: [self-hosted, 4-aws-inf1, 24-cpu, ci]
-    env:
-      AWS_REGION: us-east-1
-    steps:
-      - name: Check AMI
-        run: dpkg -l | grep neuron
-      - name: Checkout
-        uses: actions/checkout@v2
-      - name: Install system packages
-        run: |
-          sudo apt install python3.8-venv -y
-      - name: Install python packages
-        run: |
-          python3 -m venv aws_neuron_venv_pytorch
-          source aws_neuron_venv_pytorch/bin/activate
-          python -m pip install -U pip
-          python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
-          python -m pip install .[neuron,tests]
-          python -m pip uninstall optimum -y
-          python -m pip install optimum
-      - name: Run pipelines tests
-        run: |
-          source aws_neuron_venv_pytorch/bin/activate
-          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/pipelines
diff --git a/.github/workflows/test_inf2_export.yml b/.github/workflows/test_inf2_export.yml
index bd02d1e13..796b0933a 100644
--- a/.github/workflows/test_inf2_export.yml
+++ b/.github/workflows/test_inf2_export.yml
@@ -1,4 +1,4 @@
-name: Optimum neuron / Test INF2 partial export
+name: Optimum neuron / Test INF2 export
 
 on:
   push:
@@ -18,7 +18,7 @@ concurrency:
 
 jobs:
   do-the-job:
-    name: Run INF2 export tests
+    name: Run INF2 tests
     runs-on: [self-hosted, 1-aws-inf2, 32-cpu, ci] # run the job on the newly created runner
     env:
       AWS_REGION: us-east-1
@@ -38,5 +38,4 @@ jobs:
       - name: Run exporters tests
         run: |
           source aws_neuron_venv_pytorch/bin/activate
-          export MAX_EXPORT_TEST_COMBINATIONS=1
           HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters
diff --git a/.github/workflows/test_inf2_full_export.yml b/.github/workflows/test_inf2_full_export.yml
deleted file mode 100644
index 3ee8b7cae..000000000
--- a/.github/workflows/test_inf2_full_export.yml
+++ /dev/null
@@ -1,39 +0,0 @@
-name: Optimum neuron / Test INF2 full export
-
-on:
-  push:
-    branches: [ main ]
-    paths:
-      - "optimum/exporters/neuron/*.py"
-  pull_request:
-    branches: [ main ]
-    paths:
-      - "optimum/exporters/neuron/*.py"
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  do-the-job:
-    name: Run INF2 full export tests
-    runs-on: [self-hosted, 1-aws-inf2, 32-cpu, ci] # run the job on the newly created runner
-    env:
-      AWS_REGION: us-east-1
-    steps:
-      - name: Check AMI
-        run: dpkg -l | grep neuron
-      - name: Checkout
-        uses: actions/checkout@v2
-      - name: Install python dependencies
-        run: |
-          sudo apt install python3.8-venv -y
-          python3 -m venv aws_neuron_venv_pytorch
-          source aws_neuron_venv_pytorch/bin/activate
-          python -m pip install -U pip
-          python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
-          python -m pip install .[neuronx,tests]
-      - name: Run exporters tests
-        run: |
-          source aws_neuron_venv_pytorch/bin/activate
-          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters

From 81fca1d3d171fecae936cd28e5e24dd14f406996 Mon Sep 17 00:00:00 2001
From: David Corvoysier <david@huggingface.co>
Date: Fri, 16 Feb 2024 08:06:02 +0000
Subject: [PATCH 11/14] ci: reduce export and pipelines test frequency

This runs export and pipelines tests in dedicated pipelines with
stricter path filters to avoid running them on every change.
---
 .github/workflows/test_inf1_export.yml      |  5 ++-
 .github/workflows/test_inf1_full_export.yml | 47 +++++++++++++++++++++
 .github/workflows/test_inf1_inference.yml   |  6 +--
 .github/workflows/test_inf1_pipelines.yml   | 43 +++++++++++++++++++
 .github/workflows/test_inf2_export.yml      |  5 ++-
 .github/workflows/test_inf2_full_export.yml | 39 +++++++++++++++++
 6 files changed, 136 insertions(+), 9 deletions(-)
 create mode 100644 .github/workflows/test_inf1_full_export.yml
 create mode 100644 .github/workflows/test_inf1_pipelines.yml
 create mode 100644 .github/workflows/test_inf2_full_export.yml

diff --git a/.github/workflows/test_inf1_export.yml b/.github/workflows/test_inf1_export.yml
index be3bf5954..c23fce75c 100644
--- a/.github/workflows/test_inf1_export.yml
+++ b/.github/workflows/test_inf1_export.yml
@@ -1,4 +1,4 @@
-name: Optimum neuron / Test INF1 export
+name: Optimum neuron / Test INF1 partial export
 
 on:
   push:
@@ -18,7 +18,7 @@ concurrency:
 
 jobs:
   do-the-job:
-    name: Run INF1 tests
+    name: Run INF1 export tests
     runs-on: [self-hosted, 4-aws-inf1, 24-cpu, ci]
     env:
       AWS_REGION: us-east-1
@@ -46,4 +46,5 @@ jobs:
       - name: Run export tests
         run: |
           source aws_neuron_venv_pytorch/bin/activate
+          export MAX_EXPORT_TEST_COMBINATIONS=1
           HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters
diff --git a/.github/workflows/test_inf1_full_export.yml b/.github/workflows/test_inf1_full_export.yml
new file mode 100644
index 000000000..1b182a370
--- /dev/null
+++ b/.github/workflows/test_inf1_full_export.yml
@@ -0,0 +1,47 @@
+name: Optimum neuron / Test INF1 full export
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "optimum/exporters/neuron/*.py"
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "optimum/exporters/neuron/*.py"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  do-the-job:
+    name: Run INF1 full export tests
+    runs-on: [self-hosted, 4-aws-inf1, 24-cpu, ci]
+    env:
+      AWS_REGION: us-east-1
+    steps:
+      - name: Check AMI
+        run: dpkg -l | grep neuron
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Install system packages
+        run: |
+          sudo apt install python3.8-venv -y
+      - name: Install python packages
+        run: |
+          python3 -m venv aws_neuron_venv_pytorch
+          source aws_neuron_venv_pytorch/bin/activate
+          python -m pip install -U pip
+          python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
+          python -m pip install .[neuron,tests]
+          python -m pip uninstall optimum -y
+          python -m pip install optimum
+      - name: Run CLI tests
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/cli
+      - name: Run export tests
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters
diff --git a/.github/workflows/test_inf1_inference.yml b/.github/workflows/test_inf1_inference.yml
index 5bdd9571a..c6ee49170 100644
--- a/.github/workflows/test_inf1_inference.yml
+++ b/.github/workflows/test_inf1_inference.yml
@@ -1,4 +1,4 @@
-name: Optimum neuron / Test INF1 inference & pipelines
+name: Optimum neuron / Test INF1 inference
 
 on:
   push:
@@ -43,7 +43,3 @@ jobs:
         run: |
           source aws_neuron_venv_pytorch/bin/activate
           HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/inference
-      - name: Run pipelines tests
-        run: |
-          source aws_neuron_venv_pytorch/bin/activate
-          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/pipelines
diff --git a/.github/workflows/test_inf1_pipelines.yml b/.github/workflows/test_inf1_pipelines.yml
new file mode 100644
index 000000000..ab53c91e5
--- /dev/null
+++ b/.github/workflows/test_inf1_pipelines.yml
@@ -0,0 +1,43 @@
+name: Optimum neuron / Test INF1 pipelines
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "optimum/neuron/pipelines/**.py"
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "optimum/neuron/pipelines/**.py"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  do-the-job:
+    name: Run INF1 tests
+    runs-on: [self-hosted, 4-aws-inf1, 24-cpu, ci]
+    env:
+      AWS_REGION: us-east-1
+    steps:
+      - name: Check AMI
+        run: dpkg -l | grep neuron
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Install system packages
+        run: |
+          sudo apt install python3.8-venv -y
+      - name: Install python packages
+        run: |
+          python3 -m venv aws_neuron_venv_pytorch
+          source aws_neuron_venv_pytorch/bin/activate
+          python -m pip install -U pip
+          python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
+          python -m pip install .[neuron,tests]
+          python -m pip uninstall optimum -y
+          python -m pip install optimum
+      - name: Run pipelines tests
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/pipelines
diff --git a/.github/workflows/test_inf2_export.yml b/.github/workflows/test_inf2_export.yml
index 796b0933a..bd02d1e13 100644
--- a/.github/workflows/test_inf2_export.yml
+++ b/.github/workflows/test_inf2_export.yml
@@ -1,4 +1,4 @@
-name: Optimum neuron / Test INF2 export
+name: Optimum neuron / Test INF2 partial export
 
 on:
   push:
@@ -18,7 +18,7 @@ concurrency:
 
 jobs:
   do-the-job:
-    name: Run INF2 tests
+    name: Run INF2 export tests
     runs-on: [self-hosted, 1-aws-inf2, 32-cpu, ci] # run the job on the newly created runner
     env:
       AWS_REGION: us-east-1
@@ -38,4 +38,5 @@ jobs:
       - name: Run exporters tests
         run: |
           source aws_neuron_venv_pytorch/bin/activate
+          export MAX_EXPORT_TEST_COMBINATIONS=1
           HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters
diff --git a/.github/workflows/test_inf2_full_export.yml b/.github/workflows/test_inf2_full_export.yml
new file mode 100644
index 000000000..3ee8b7cae
--- /dev/null
+++ b/.github/workflows/test_inf2_full_export.yml
@@ -0,0 +1,39 @@
+name: Optimum neuron / Test INF2 full export
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - "optimum/exporters/neuron/*.py"
+  pull_request:
+    branches: [ main ]
+    paths:
+      - "optimum/exporters/neuron/*.py"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  do-the-job:
+    name: Run INF2 full export tests
+    runs-on: [self-hosted, 1-aws-inf2, 32-cpu, ci] # run the job on the newly created runner
+    env:
+      AWS_REGION: us-east-1
+    steps:
+      - name: Check AMI
+        run: dpkg -l | grep neuron
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Install python dependencies
+        run: |
+          sudo apt install python3.8-venv -y
+          python3 -m venv aws_neuron_venv_pytorch
+          source aws_neuron_venv_pytorch/bin/activate
+          python -m pip install -U pip
+          python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
+          python -m pip install .[neuronx,tests]
+      - name: Run exporters tests
+        run: |
+          source aws_neuron_venv_pytorch/bin/activate
+          HF_TOKEN=${{ secrets.HF_TOKEN_OPTIMUM_NEURON_CI }} pytest -m is_inferentia_test tests/exporters

From c8f8bc052861dc2313f8bb1935d18945d67244b4 Mon Sep 17 00:00:00 2001
From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com>
Date: Fri, 16 Feb 2024 22:00:03 +0000
Subject: [PATCH 12/14] fix tests

---
 optimum/commands/export/neuronx.py        |  2 +-
 optimum/exporters/neuron/__main__.py      | 17 +++++++++++++++--
 optimum/exporters/neuron/model_configs.py |  4 ++--
 optimum/exporters/neuron/utils.py         | 11 ++++++-----
 optimum/neuron/modeling.py                |  1 +
 optimum/neuron/modeling_base.py           | 16 ++++++++++++----
 optimum/neuron/modeling_diffusion.py      |  2 ++
 optimum/neuron/utils/argument_utils.py    |  4 +++-
 tests/inference/test_modeling.py          |  6 +++++-
 9 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/optimum/commands/export/neuronx.py b/optimum/commands/export/neuronx.py
index fc1d2c73e..2e43015b5 100644
--- a/optimum/commands/export/neuronx.py
+++ b/optimum/commands/export/neuronx.py
@@ -58,7 +58,7 @@ def parse_args_neuronx(parser: "ArgumentParser"):
         type=str,
         choices=["transformers", "diffusers", "sentence_transformers"],
         default=None,
-        help=("The library on the model." " If not provided, will attempt to infer the local checkpoint's library."),
+        help=("The library of the model." " If not provided, will attempt to infer the local checkpoint's library."),
     )
     optional_group.add_argument(
         "--subfolder",
diff --git a/optimum/exporters/neuron/__main__.py b/optimum/exporters/neuron/__main__.py
index f68f0cca4..b80e827a6 100644
--- a/optimum/exporters/neuron/__main__.py
+++ b/optimum/exporters/neuron/__main__.py
@@ -235,6 +235,7 @@ def _get_submodels_and_neuron_configs(
     model: Union["PreTrainedModel", "DiffusionPipeline"],
     input_shapes: Dict[str, int],
     task: str,
+    library_name: str,
     output: Path,
     dynamic_batch_size: bool = False,
     model_name_or_path: Optional[Union[str, Path]] = None,
@@ -254,7 +255,12 @@ def _get_submodels_and_neuron_configs(
                 f"`output_attentions` and `output_hidden_states` are not supported by the {task} task yet."
             )
         models_and_neuron_configs, output_model_names = _get_submodels_and_neuron_configs_for_stable_diffusion(
-            model, input_shapes, task, output, dynamic_batch_size, submodels
+            model,
+            input_shapes,
+            task,
+            output,
+            dynamic_batch_size,
+            submodels,
         )
     elif is_encoder_decoder:
         optional_outputs = {"output_attentions": output_attentions, "output_hidden_states": output_hidden_states}
@@ -268,7 +274,10 @@ def _get_submodels_and_neuron_configs(
                 f"`output_attentions` and `output_hidden_states` are not supported by the {task} task yet."
             )
         neuron_config_constructor = TasksManager.get_exporter_config_constructor(
-            model=model, exporter="neuron", task=task
+            model=model,
+            exporter="neuron",
+            task=task,
+            library_name=library_name,
         )
         neuron_config = neuron_config_constructor(model.config, dynamic_batch_size=dynamic_batch_size, **input_shapes)
         model_name = getattr(model, "name_or_path", None) or model_name_or_path
@@ -391,6 +400,9 @@ def main_export(
 
     task = TasksManager.map_from_synonym(task)
     is_stable_diffusion = "stable-diffusion" in task
+    library_name = TasksManager.infer_library_from_model(
+        model_name_or_path, subfolder=subfolder, library_name=library_name
+    )
 
     model_kwargs = {
         "task": task,
@@ -411,6 +423,7 @@ def main_export(
         model=model,
         input_shapes=input_shapes,
         task=task,
+        library_name=library_name,
         output=output,
         dynamic_batch_size=dynamic_batch_size,
         model_name_or_path=model_name_or_path,
diff --git a/optimum/exporters/neuron/model_configs.py b/optimum/exporters/neuron/model_configs.py
index 5e6f36014..1b6ce4b2e 100644
--- a/optimum/exporters/neuron/model_configs.py
+++ b/optimum/exporters/neuron/model_configs.py
@@ -228,7 +228,7 @@ def outputs(self) -> List[str]:
         return ["logits_per_image", "logits_per_text", "text_embeds", "image_embeds"]
 
 
-@register_in_tasks_manager("clip-text-with-projection", *["feature-extraction"])
+@register_in_tasks_manager("clip-text-with-projection", *["feature-extraction"], library_name="diffusers")
 class CLIPTextWithProjectionNeuronConfig(TextEncoderNeuronConfig):
     MODEL_TYPE = "clip-text-model"
     ATOL_FOR_VALIDATION = 1e-3
@@ -254,7 +254,7 @@ def outputs(self) -> List[str]:
         return common_outputs
 
 
-@register_in_tasks_manager("clip-text-model", *["feature-extraction"])
+@register_in_tasks_manager("clip-text-model", *["feature-extraction"], library_name="diffusers")
 class CLIPTextNeuronConfig(CLIPTextWithProjectionNeuronConfig):
     MODEL_TYPE = "clip-text-model"
 
diff --git a/optimum/exporters/neuron/utils.py b/optimum/exporters/neuron/utils.py
index 0ce8a3325..eb3d799d3 100644
--- a/optimum/exporters/neuron/utils.py
+++ b/optimum/exporters/neuron/utils.py
@@ -163,6 +163,7 @@ def get_stable_diffusion_models_for_export(
         Neuron configs for the different components of the model.
     """
     models_for_export = _get_submodels_for_export_stable_diffusion(pipeline=pipeline, task=task)
+    library_name = "diffusers"
 
     # Text encoders
     if DIFFUSION_MODEL_TEXT_ENCODER_NAME in models_for_export:
@@ -171,7 +172,7 @@ def get_stable_diffusion_models_for_export(
             model=text_encoder,
             exporter="neuron",
             task="feature-extraction",
-            library_name="transformers",
+            library_name=library_name,
         )
         text_encoder_neuron_config = text_encoder_config_constructor(
             text_encoder.config,
@@ -188,7 +189,7 @@ def get_stable_diffusion_models_for_export(
             exporter="neuron",
             task="feature-extraction",
             model_type="clip-text-with-projection",
-            library_name="transformers",
+            library_name=library_name,
         )
         text_encoder_neuron_config_2 = text_encoder_config_constructor_2(
             text_encoder_2.config,
@@ -205,7 +206,7 @@ def get_stable_diffusion_models_for_export(
         exporter="neuron",
         task="semantic-segmentation",
         model_type="unet",
-        library_name="diffusers",
+        library_name=library_name,
     )
     unet_neuron_config = unet_neuron_config_constructor(
         unet.config,
@@ -224,7 +225,7 @@ def get_stable_diffusion_models_for_export(
         exporter="neuron",
         task="semantic-segmentation",
         model_type="vae-encoder",
-        library_name="diffusers",
+        library_name=library_name,
     )
     vae_encoder_neuron_config = vae_encoder_config_constructor(
         vae_encoder.config,
@@ -241,7 +242,7 @@ def get_stable_diffusion_models_for_export(
         exporter="neuron",
         task="semantic-segmentation",
         model_type="vae-decoder",
-        library_name="diffusers",
+        library_name=library_name,
     )
     vae_decoder_neuron_config = vae_decoder_config_constructor(
         vae_decoder.config,
diff --git a/optimum/neuron/modeling.py b/optimum/neuron/modeling.py
index 9f37f9b9e..fa2fdb574 100644
--- a/optimum/neuron/modeling.py
+++ b/optimum/neuron/modeling.py
@@ -204,6 +204,7 @@ class NeuronModelForSentenceTransformers(NeuronBaseModel):
     """
 
     auto_model_class = AutoModel
+    library_name = "sentence_transformers"
 
     @add_start_docstrings_to_model_forward(
         NEURON_TEXT_INPUTS_DOCSTRING.format("batch_size, sequence_length")
diff --git a/optimum/neuron/modeling_base.py b/optimum/neuron/modeling_base.py
index e26d42afe..e00c9305e 100644
--- a/optimum/neuron/modeling_base.py
+++ b/optimum/neuron/modeling_base.py
@@ -75,6 +75,7 @@ class NeuronBaseModel(OptimizedModel):
 
     model_type = "neuron_model"
     auto_model_class = AutoModel
+    library_name = "transformers"
 
     def __init__(
         self,
@@ -250,6 +251,7 @@ def _export(
         """
         if task is None:
             task = TasksManager.infer_task_from_model(cls.auto_model_class)
+        library_name = TasksManager.infer_library_from_model(model_id, subfolder=subfolder, library_name=library_name)
 
         save_dir = TemporaryDirectory()
         save_dir_path = Path(save_dir.name)
@@ -270,7 +272,10 @@ def _export(
 
         task = TasksManager.map_from_synonym(task)
         neuron_config_constructor = TasksManager.get_exporter_config_constructor(
-            model=model, exporter="neuron", task=task
+            model=model,
+            exporter="neuron",
+            task=task,
+            library_name=library_name,
         )
 
         input_shapes = {}
@@ -321,8 +326,8 @@ def _export(
             **compiler_kwargs,
         )
 
-        store_compilation_config(
-            config=config,
+        config = store_compilation_config(
+            config=model.config,
             input_shapes=input_shapes,
             compiler_kwargs=compiler_kwargs,
             input_names=input_names,
@@ -437,7 +442,10 @@ def _neuron_config_init(cls, config: "PretrainedConfig") -> "NeuronDefaultConfig
         task = TasksManager.map_from_synonym(task)
         model_type = neuron_config.get("model_type", None) or config.model_type
         neuron_config_constructor = TasksManager.get_exporter_config_constructor(
-            model_type=model_type, exporter="neuron", task=task
+            model_type=model_type,
+            exporter="neuron",
+            task=task,
+            library_name=cls.library_name,
         )
 
         return neuron_config_constructor(
diff --git a/optimum/neuron/modeling_diffusion.py b/optimum/neuron/modeling_diffusion.py
index e6b9080ee..ef00f377b 100644
--- a/optimum/neuron/modeling_diffusion.py
+++ b/optimum/neuron/modeling_diffusion.py
@@ -80,6 +80,7 @@
 
 class NeuronStableDiffusionPipelineBase(NeuronBaseModel):
     auto_model_class = StableDiffusionPipeline
+    library_name = "diffusers"
     base_model_prefix = "neuron_model"
     config_name = "model_index.json"
     sub_component_config_name = "config.json"
@@ -653,6 +654,7 @@ def _export(
             use_auth_token=use_auth_token,
             do_validation=False,
             submodels={"unet": unet_id},
+            library_name=cls.library_name,
             **input_shapes,
         )
 
diff --git a/optimum/neuron/utils/argument_utils.py b/optimum/neuron/utils/argument_utils.py
index 208535796..499334667 100644
--- a/optimum/neuron/utils/argument_utils.py
+++ b/optimum/neuron/utils/argument_utils.py
@@ -179,7 +179,9 @@ def store_compilation_config(
     config_args["input_names"] = input_names
     config_args["output_names"] = output_names
 
-    original_model_type = getattr(config, "model_type", None)
+    original_model_type = getattr(config, "export_model_type", None) or getattr(
+        config, "model_type", None
+    )  # prioritize sentence_transformers to transformers
     neuron_model_type = str(model_type).replace("_", "-") if model_type is not None else model_type
     if original_model_type is None:
         update_func(
diff --git a/tests/inference/test_modeling.py b/tests/inference/test_modeling.py
index 3884b3517..3cea7f913 100644
--- a/tests/inference/test_modeling.py
+++ b/tests/inference/test_modeling.py
@@ -333,7 +333,11 @@ class NeuronModelForSentenceTransformersIntegrationTest(NeuronModelTestMixin):
     TASK = "feature-extraction"
     ATOL_FOR_VALIDATION = 1e-2
     # TODO: only support text models so far, will support vision next
-    SUPPORTED_ARCHITECTURES = ["sentence-transformers-transformer"]
+    SUPPORTED_ARCHITECTURES = ["transformer"]
+    ARCH_MODEL_MAP = {
+        "transformer": "sentence-transformers/all-MiniLM-L6-v2",
+        "clip": "sentence-transformers/clip-ViT-B-32",
+    }
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES, skip_on_empty=True)
     @requires_neuronx

From 8eccde27a23d3adac9fc6e858407077b943b1346 Mon Sep 17 00:00:00 2001
From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com>
Date: Fri, 16 Feb 2024 22:34:04 +0000
Subject: [PATCH 13/14] make lib optional

---
 optimum/exporters/neuron/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/exporters/neuron/__main__.py b/optimum/exporters/neuron/__main__.py
index b80e827a6..1094b80f6 100644
--- a/optimum/exporters/neuron/__main__.py
+++ b/optimum/exporters/neuron/__main__.py
@@ -235,8 +235,8 @@ def _get_submodels_and_neuron_configs(
     model: Union["PreTrainedModel", "DiffusionPipeline"],
     input_shapes: Dict[str, int],
     task: str,
-    library_name: str,
     output: Path,
+    library_name: Optional[str] = None,
     dynamic_batch_size: bool = False,
     model_name_or_path: Optional[Union[str, Path]] = None,
     submodels: Optional[Dict[str, Union[Path, str]]] = None,

From 90e30eeff48b7c4c9cfebcbb2af5914d375a031d Mon Sep 17 00:00:00 2001
From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com>
Date: Fri, 16 Feb 2024 23:02:31 +0000
Subject: [PATCH 14/14] fix pytest 8.0.1 break

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 137227e0d..05197419f 100644
--- a/setup.py
+++ b/setup.py
@@ -22,7 +22,7 @@
 ]
 
 TESTS_REQUIRE = [
-    "pytest",
+    "pytest <= 8.0.0",
     "psutil",
     "parameterized",
     "GitPython",