diff --git a/auto_round/utils/common.py b/auto_round/utils/common.py index 9d4e4c98a..702b815ad 100644 --- a/auto_round/utils/common.py +++ b/auto_round/utils/common.py @@ -88,8 +88,6 @@ def __init__(self): "auto_round:gptqmodel", "auto_round:auto_awq", "auto_round:llm_compressor", - "itrex", - "itrex_xpu", "fake", "llm_compressor", ) diff --git a/test/test_cpu/test_autoround.py b/test/test_cpu/test_autoround.py index adadaa837..dd188e6ad 100644 --- a/test/test_cpu/test_autoround.py +++ b/test/test_cpu/test_autoround.py @@ -162,28 +162,6 @@ def test_nv_fp4(self): print(result["results"]["lambada_openai"]["acc,none"]) self.assertGreater(result["results"]["lambada_openai"]["acc,none"], 0.35) - def test_default(self): - bits, group_size, sym = 4, 128, False - autoround = AutoRound( - self.model, - self.tokenizer, - bits=bits, - group_size=group_size, - sym=sym, - iters=2, - seqlen=2, - dataset=self.llm_dataloader, - ) - autoround.quantize() - - autoround.save_quantized(output_dir="./saved", inplace=False, format="itrex") - try: - import auto_gptq - except: - return - if torch.cuda.is_available(): - autoround.save_quantized(output_dir="./saved", inplace=False) - def test_w4g1(self): model_name = "/tf_dataset/auto_round/models/facebook/opt-125m" bits, group_size, sym = 4, -1, True diff --git a/test/test_cpu/test_autoround_acc.py b/test/test_cpu/test_autoround_acc.py index 97211ade4..41b28e663 100644 --- a/test/test_cpu/test_autoround_acc.py +++ b/test/test_cpu/test_autoround_acc.py @@ -13,7 +13,6 @@ from transformers import AutoModelForCausalLM, AutoTokenizer from auto_round import AutoRound # pylint: disable=E0401 -from auto_round.export.export_to_itrex.export import pack_model # pylint: disable=E0401 class LLMDataLoader: