From c0680e09459abd42249917581f1c306e21be7944 Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Wed, 9 Oct 2024 14:19:13 +0800
Subject: [PATCH 01/11] rm itrex dependency for 2x example

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 .../text_generation/llama/quantization/ptq_static/main.py | 2 +-
 .../llama/quantization/weight_only/main.py                | 2 +-
 .../llama/quantization/weight_only/requirements.txt       | 3 +--
 .../language-modeling/pruning/eager/requirements.txt      | 3 +--
 .../language-modeling/pruning/eager/run_clm_sparsegpt.py  | 2 +-
 .../language-modeling/quantization/llm/requirements.txt   | 3 +--
 .../quantization/llm/run_clm_no_trainer.py                | 8 ++++----
 7 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/main.py b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/main.py
index 29e8653bfab..3da42f9d9d9 100644
--- a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/main.py
+++ b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/main.py
@@ -197,7 +197,7 @@ def replace_architectures(json_path):
         json.dump(data, file, indent=4)
 
 def eval_func(model):
-    from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
+    from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser
 
     model_dir = model
     if isinstance(model, str) and model.endswith(".onnx"):
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/main.py b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/main.py
index e43ceecefe7..a5860c14c24 100644
--- a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/main.py
+++ b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/main.py
@@ -134,7 +134,7 @@ def replace_architectures(json_path):
         json.dump(data, file, indent=4)
 
 def eval_func(model):
-    from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
+    from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser
 
     model_dir = model
     if isinstance(model, str) and model.endswith(".onnx"):
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/requirements.txt b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/requirements.txt
index 8279cc72722..261983b9c80 100644
--- a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/requirements.txt
+++ b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/requirements.txt
@@ -7,6 +7,5 @@ onnxruntime-extensions; python_version < '3.11'
 datasets
 optimum
 evaluate
-intel-extension-for-transformers >= 1.4.1
 peft
-lm-eval==0.4.2
+lm-eval==0.4.3
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/requirements.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/requirements.txt
index e129cb6dc91..3809a2fef03 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/requirements.txt
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/requirements.txt
@@ -1,7 +1,6 @@
 accelerate
 datasets
 einops
-intel-extension-for-transformers
 optimum
 peft
 sentencepiece
@@ -10,4 +9,4 @@ torch
 tqdm
 tiktoken
 transformers_stream_generator
-lm_eval==0.4.2
+lm_eval==0.4.3
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/run_clm_sparsegpt.py b/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/run_clm_sparsegpt.py
index 5b34ae79382..49e53b5000e 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/run_clm_sparsegpt.py
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/run_clm_sparsegpt.py
@@ -588,7 +588,7 @@ def group_texts(examples):
     eval_batch = args.per_device_eval_batch_size
     user_model = None if args.use_accelerate else model
 
-    from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
+    from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser
     eval_args = LMEvalParser(
         model="hf", 
         user_model=user_model,
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt
index fe73842a104..4616edc3464 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt
@@ -9,5 +9,4 @@ pytest
 wandb
 einops
 neural-compressor
-intel-extension-for-transformers
-lm_eval==0.4.2
+lm_eval==0.4.3
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
index 2407840c381..1f8ce41b9c5 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
@@ -354,7 +354,7 @@ def eval_func(model):
 if args.accuracy:
     user_model.eval()
     if args.code_generation:
-        from intel_extension_for_transformers.transformers.llm.evaluation.bigcode_eval import evaluate
+        from neural_compressor.evaluation.bigcode_eval import evaluate
         from transformers import AutoTokenizer
         tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
         results = evaluate(
@@ -370,7 +370,7 @@ def eval_func(model):
             else:
                 acc = results["results"][task_name]["acc"]
     else:
-        from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
+        from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser
         eval_args = LMEvalParser(
             model="hf", 
             user_model=user_model,
@@ -395,7 +395,7 @@ def eval_func(model):
     samples = args.iters * args.batch_size
 
     if args.code_generation:
-        from intel_extension_for_transformers.transformers.llm.evaluation.bigcode_eval import evaluate
+        from neural_compressor.evaluation.bigcode_eval import evaluate
         from transformers import AutoTokenizer
         tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
         start = time.time()
@@ -413,7 +413,7 @@ def eval_func(model):
             else:
                 acc = results["results"][task_name]["acc"]
     else:
-        from intel_extension_for_transformers.transformers.llm.evaluation.lm_eval import evaluate, LMEvalParser
+        from neural_compressor.evaluation.lm_eval import evaluate, LMEvalParser
         eval_args = LMEvalParser(
             model="hf", 
             user_model=user_model,

From 0a2f689910dffb7bb493f91a96406bd15212ad69 Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Fri, 11 Oct 2024 09:20:59 +0800
Subject: [PATCH 02/11] update ipex AutoTokenizer import

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 .../language-modeling/quantization/llm/run_clm_no_trainer.py     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
index 1f8ce41b9c5..46aeb14012f 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
@@ -343,6 +343,7 @@ def eval_func(model):
 
     if args.ipex:
         user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)))
+        from transformers import AutoTokenizer
         tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
     else:
         user_model, tokenizer = get_user_model()

From 7d4e01e1c51c54e51e844dcaaf29f5524de64c09 Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Fri, 11 Oct 2024 09:35:09 +0800
Subject: [PATCH 03/11] update onnxrt ptq_static

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 .../llama/quantization/ptq_static/requirements.txt             | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/requirements.txt
index 8279cc72722..261983b9c80 100644
--- a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/requirements.txt
+++ b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/requirements.txt
@@ -7,6 +7,5 @@ onnxruntime-extensions; python_version < '3.11'
 datasets
 optimum
 evaluate
-intel-extension-for-transformers >= 1.4.1
 peft
-lm-eval==0.4.2
+lm-eval==0.4.3

From b17ca7737cf9d83d5497fe20dbeab863d22be493 Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Fri, 11 Oct 2024 10:09:18 +0800
Subject: [PATCH 04/11] add numba/pydantic dependencies

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 .../llama/quantization/ptq_static/requirements.txt             | 2 ++
 .../llama/quantization/weight_only/requirements.txt            | 2 ++
 .../language-modeling/pruning/eager/requirements.txt           | 2 ++
 .../language-modeling/quantization/llm/requirements.txt        | 3 ++-
 4 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/requirements.txt b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/requirements.txt
index 261983b9c80..fbd60f42f23 100644
--- a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/requirements.txt
+++ b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/requirements.txt
@@ -9,3 +9,5 @@ optimum
 evaluate
 peft
 lm-eval==0.4.3
+numba
+pydantic
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/requirements.txt b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/requirements.txt
index 261983b9c80..fbd60f42f23 100644
--- a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/requirements.txt
+++ b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/requirements.txt
@@ -9,3 +9,5 @@ optimum
 evaluate
 peft
 lm-eval==0.4.3
+numba
+pydantic
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/requirements.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/requirements.txt
index 3809a2fef03..9fee9d0543d 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/requirements.txt
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/requirements.txt
@@ -10,3 +10,5 @@ tqdm
 tiktoken
 transformers_stream_generator
 lm_eval==0.4.3
+numba
+pydantic
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt
index 4616edc3464..1f70fea933b 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt
@@ -8,5 +8,6 @@ transformers
 pytest
 wandb
 einops
-neural-compressor
 lm_eval==0.4.3
+numba
+pydantic

From 8378c7a1a9cda3b52d2b8c7bac6177149a253f66 Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Fri, 11 Oct 2024 12:26:46 +0800
Subject: [PATCH 05/11] add trust_remote_code for wishper_lager

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 .../whisper_large/quantization/ptq_dynamic/fx/run_quant.sh    | 3 ++-
 .../quantization/ptq_dynamic/fx/run_whisper_large.py          | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_quant.sh b/examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_quant.sh
index 1db9e613cd4..1ac159cad17 100755
--- a/examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_quant.sh
+++ b/examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_quant.sh
@@ -45,7 +45,8 @@ function run_tuning {
             --tune \
             --batch_size $batch_size \
             --output_dir ${output_model} \
-	    --cache_dir ${dataset_location}
+	          --cache_dir ${dataset_location} \
+            --trust_remote_code
 
 }
 
diff --git a/examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_whisper_large.py b/examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_whisper_large.py
index fbf9b53d81f..19b93a63837 100755
--- a/examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_whisper_large.py
+++ b/examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_whisper_large.py
@@ -24,13 +24,15 @@
                     help='the folder path to save the results.')
 parser.add_argument('--cache_dir', default=None, type=str,
                     help='the folder path to save the results.')
+parser.add_argument("--trust_remote_code", action="store_true")
 
 args = parser.parse_args()
 model_name = 'openai/whisper-large'
 processor = WhisperProcessor.from_pretrained(model_name)
 model = WhisperForConditionalGeneration.from_pretrained(model_name)
 # dataset
-librispeech_test_clean = load_dataset("librispeech_asr", "clean", split="test", cache_dir=args.cache_dir)
+librispeech_test_clean = load_dataset("librispeech_asr", "clean", split="test", cache_dir=args.cache_dir, 
+                                      trust_remote_code=args.trust_remote_code)
 
 # metric
 wer = load("wer")

From 6ac4ff78ab3c08965bc832d9c28b8936112ae7d9 Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Mon, 14 Oct 2024 10:55:19 +0800
Subject: [PATCH 06/11] fix ipex config

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 .../language-modeling/quantization/llm/run_clm_no_trainer.py  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
index 46aeb14012f..35cd87bf3a6 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
@@ -343,8 +343,10 @@ def eval_func(model):
 
     if args.ipex:
         user_model = load(os.path.abspath(os.path.expanduser(args.output_dir)))
-        from transformers import AutoTokenizer
+        from transformers import AutoTokenizer, AutoConfig
         tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
+        config = AutoConfig.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
+        user_model.config = config
     else:
         user_model, tokenizer = get_user_model()
         kwargs = {'weight_only': True} if args.approach == 'weight_only' else {}

From ca57e5e431279c0de64974dddae4e69bc276dbe1 Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Mon, 14 Oct 2024 11:44:33 +0800
Subject: [PATCH 07/11] update ipex model.config

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 .../language-modeling/quantization/llm/run_clm_no_trainer.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
index 35cd87bf3a6..22b5ec453f2 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
@@ -346,7 +346,7 @@ def eval_func(model):
         from transformers import AutoTokenizer, AutoConfig
         tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
         config = AutoConfig.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
-        user_model.config = config
+        setattr(user_model, "config", config)
     else:
         user_model, tokenizer = get_user_model()
         kwargs = {'weight_only': True} if args.approach == 'weight_only' else {}

From fb244d51ed97f3d453da8912b9cb07191fdcf83f Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Mon, 14 Oct 2024 12:43:19 +0800
Subject: [PATCH 08/11] fix bf16 attr(wishiper)

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 neural_compressor/adaptor/torch_utils/bf16_convert.py   | 8 +++++---
 test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py | 6 ++++--
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/neural_compressor/adaptor/torch_utils/bf16_convert.py b/neural_compressor/adaptor/torch_utils/bf16_convert.py
index b6d5e6d01bd..8c55cbebf94 100644
--- a/neural_compressor/adaptor/torch_utils/bf16_convert.py
+++ b/neural_compressor/adaptor/torch_utils/bf16_convert.py
@@ -19,6 +19,7 @@
 import torch.nn as nn
 
 from ...utils import logger
+from .util import append_attr
 
 
 class BF16ModuleWrapper(nn.Module):
@@ -62,9 +63,10 @@ def Convert(model, tune_cfg):
 def _bf16_wrapper_model(model, bf16_ops_list, prefix=""):
     for name, child in model.named_children():
         op_name = prefix + "." + name if prefix != "" else name
+        _bf16_wrapper_model(child, bf16_ops_list, op_name)
         for bf16_op_name in bf16_ops_list:
             if op_name == bf16_op_name[0] or op_name == bf16_op_name[0].split(".module")[0]:
-                child = BF16ModuleWrapper(child)
-                setattr(model, name, child)
-        _bf16_wrapper_model(child, bf16_ops_list, op_name)
+                child_bf16 = BF16ModuleWrapper(child)
+                append_attr(child_bf16, child)
+                setattr(model, name, child_bf16)
     return model
diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py
index 1bfa38a0bb7..690122699e7 100644
--- a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py
+++ b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py
@@ -401,8 +401,10 @@ def test_mix_precision(self):
         ptq_fx_op_name_list["conv.*"] = {"weight": {"dtype": "bf16"}, "activation": {"dtype": "bf16"}}
         conf = PostTrainingQuantConfig(op_name_dict=ptq_fx_op_name_list)
         q_model = quantization.fit(model_origin, conf, calib_dataloader=dataloader, calib_func=eval_func)
-        self.assertEqual(q_model._model.conv.module.module.weight.dtype, torch.bfloat16)
-        self.assertEqual(q_model._model.conv.module.module.bias.dtype, torch.bfloat16)
+        self.assertEqual(q_model._model.conv.module.weight.dtype, torch.bfloat16)
+        self.assertEqual(q_model._model.conv.module.bias.dtype, torch.bfloat16)
+        self.assertEqual(q_model._model.conv.stride[0], 1, 
+                         msg="GraphModule object should have the attributes of the original module.")
 
     def test_hawq_metric(self):
         # Test for hawq metric

From 4a22b306ba83ac991f5a90ffebe953bf6005ab1f Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Mon, 14 Oct 2024 13:16:49 +0800
Subject: [PATCH 09/11] adpat transformers 4.45.x(AWQ)

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 neural_compressor/adaptor/torch_utils/awq.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/neural_compressor/adaptor/torch_utils/awq.py b/neural_compressor/adaptor/torch_utils/awq.py
index 35c35624745..a745b4ec21a 100644
--- a/neural_compressor/adaptor/torch_utils/awq.py
+++ b/neural_compressor/adaptor/torch_utils/awq.py
@@ -454,6 +454,9 @@ def block_inference(self, model):
         """
         total_out = []
         for args, kwargs in zip(self.total_block_args, self.total_block_kwargs):
+            # to avoid layer_past: Dynamic_cache when transformers higher than 4.45.1
+            if "layer_past" in kwargs.keys() and kwargs["layer_past"] is not None:
+                kwargs["layer_past"] = None
             out = model(*args, **kwargs)
             if isinstance(out, tuple):  # pragma: no cover
                 out = out[0]

From 0e5d6119cf8e88d64631062c43f82835aa0cbd45 Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Tue, 15 Oct 2024 15:23:38 +0800
Subject: [PATCH 10/11] rm ipex int8 benchmark

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 .../language-modeling/quantization/llm/run_benchmark.sh       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh
index 0277a26c79c..c9461f822a8 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh
@@ -89,10 +89,10 @@ function run_benchmark {
         extra_cmd=$extra_cmd" --woq_algo TEQ"
     elif [ "${topology}" = "opt_125m_ipex" ]; then
         model_name_or_path="facebook/opt-125m"
-        extra_cmd=$extra_cmd" --ipex --int8_bf16_mixed"
+        extra_cmd=$extra_cmd" --ipex"
     elif [ "${topology}" = "opt_125m_ipex_sq" ]; then
         model_name_or_path="facebook/opt-125m"
-        extra_cmd=$extra_cmd" --ipex --int8_bf16_mixed --sq --alpha 0.5"
+        extra_cmd=$extra_cmd" --ipex --sq --alpha 0.5"
     elif [ "${topology}" = "bloom_560m_ipex_sq" ]; then
         model_name_or_path="bigscience/bloom-560m"
         extra_cmd=$extra_cmd" --ipex --sq --alpha 0.5"

From fabe91b7d1c14dffb03d10d4511c720d979f3fd5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 15 Oct 2024 08:16:47 +0000
Subject: [PATCH 11/11] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py
index 690122699e7..24bbc47c888 100644
--- a/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py
+++ b/test/adaptor/pytorch_adaptor/test_adaptor_pytorch_2x.py
@@ -403,8 +403,11 @@ def test_mix_precision(self):
         q_model = quantization.fit(model_origin, conf, calib_dataloader=dataloader, calib_func=eval_func)
         self.assertEqual(q_model._model.conv.module.weight.dtype, torch.bfloat16)
         self.assertEqual(q_model._model.conv.module.bias.dtype, torch.bfloat16)
-        self.assertEqual(q_model._model.conv.stride[0], 1, 
-                         msg="GraphModule object should have the attributes of the original module.")
+        self.assertEqual(
+            q_model._model.conv.stride[0],
+            1,
+            msg="GraphModule object should have the attributes of the original module.",
+        )
 
     def test_hawq_metric(self):
         # Test for hawq metric