diff --git a/neural_compressor/transformers/models/modeling_auto.py b/neural_compressor/transformers/models/modeling_auto.py index 59de626cfaf..fc9e2eb0ee6 100644 --- a/neural_compressor/transformers/models/modeling_auto.py +++ b/neural_compressor/transformers/models/modeling_auto.py @@ -745,13 +745,13 @@ def load_low_bit(cls, pretrained_model_name_or_path, *model_args, **kwargs): tmp_kwargs = { "sharded_metadata": sharded_metadata, "disk_offload_folder": offload_folder, - "offload_state_dict": offload_state_dict, "dtype": torch_dtype, } + if parse(transformers.__version__) < parse("4.57"): + tmp_kwargs["offload_state_dict"] = offload_state_dict if parse(transformers.__version__) < parse("4.51"): tmp_kwargs["_fast_init"] = _fast_init tmp_kwargs["low_cpu_mem_usage"] = True - model_message = model_class._load_pretrained_model(*tmp_args, **tmp_kwargs) model = model_message[0]