diff --git a/auto_round/compressors/base.py b/auto_round/compressors/base.py index c56e10750..609050746 100644 --- a/auto_round/compressors/base.py +++ b/auto_round/compressors/base.py @@ -1310,10 +1310,11 @@ def _quantize_rtn(self) -> tuple[torch.nn.Module, dict[str, Any]]: self.model.to("cpu") enable_imatrix = False - if has_gguf_k and not self.disable_opt_rtn: - enable_imatrix = True - if self.data_type == "int" and self.sym: - enable_imatrix = True + if not self.disable_opt_rtn: + if has_gguf_k: + enable_imatrix = True + elif self.data_type == "int" and self.sym: + enable_imatrix = True if enable_imatrix: self._quant_rtn_with_imatrix(all_to_quantized_module_names)