diff --git a/neural_compressor/transformers/models/modeling_auto.py b/neural_compressor/transformers/models/modeling_auto.py index cd5b3fe0975..571ec48acb4 100644 --- a/neural_compressor/transformers/models/modeling_auto.py +++ b/neural_compressor/transformers/models/modeling_auto.py @@ -325,7 +325,7 @@ def load_low_bit(cls, pretrained_model_name_or_path, *model_args, **kwargs): quantization_config = TeqConfig.from_dict(quantization_config) elif quantization_config["quant_method"] == "gptq": quantization_config = GPTQConfig.from_dict(quantization_config) - elif quantization_config["quant_method"] == "autoround": + elif quantization_config["quant_method"] in ["autoround", "intel/auto-round"]: quantization_config = AutoRoundConfig.from_dict(quantization_config) assert quantization_config is not None, "Detect this model is not a low-bit model."