Change the default value for XPU weight-only quantization (#1194)

Signed-off-by: Cheng Penghui <penghui.cheng@intel.com>
intel · Jan 26, 2024 · 4a78bab · 4a78bab
1 parent 7156218
commit 4a78bab
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/intel_extension_for_transformers/transformers/modeling/modeling_auto.py b/intel_extension_for_transformers/transformers/modeling/modeling_auto.py
@@ -232,7 +232,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
                         quantization_config = WeightOnlyQuantConfig(compute_dtype="fp32", weight_dtype="nf4")
                     else:
                         quantization_config = WeightOnlyQuantConfig(compute_dtype=convert_dtype_torch2str(torch_dtype),
-                                                                    weight_dtype="nf4")
+                                                                    weight_dtype="nf4" if use_cpu else "int4_fullrange")
                 else:
                     assert ("4" in quantization_config.weight_dtype
                             and convert_dtype_str2torch(quantization_config.compute_dtype) == torch_dtype