diff --git a/neural_compressor/adaptor/torch_utils/gptq.py b/neural_compressor/adaptor/torch_utils/gptq.py index 2f1c6cc0582..a077e932b33 100644 --- a/neural_compressor/adaptor/torch_utils/gptq.py +++ b/neural_compressor/adaptor/torch_utils/gptq.py @@ -718,10 +718,12 @@ def tmp(_, inp, out): for n, p in sub_layer.named_parameters(): param_name = full_layer_name + "." + n if n == "weight": - set_module_tensor_to_device(self.model, param_name, self.device, Q) + set_module_tensor_to_device(self.model, param_name, self.device, Q, dtype=Q.dtype) else: value = load_value(self.model, param_name, model_path) - set_module_tensor_to_device(self.model, param_name, self.device, value) + set_module_tensor_to_device( + self.model, param_name, self.device, value, dtype=value.dtype + ) # sub_layer.weight.data = Q torch.save(sub_layer.state_dict(), LWQ_WORKSPACE + f"/{full_layer_name}.pt") clean_module_weight(sub_layer) @@ -745,6 +747,8 @@ def tmp(_, inp, out): for j in range(len(self.dataloader)): cache_keyword_batch = self.gather_single_batch_from_dict(self.cache_key_arguments, j) cache_positional_batch = self.gather_single_batch_from_list(self.cache_positional_arguments, j) + # breakpoint() + # transformer_block = transformer_block.to(getattr(torch, self.model.config.torch_dtype)) out = transformer_block(*cache_positional_batch, **cache_keyword_batch) out = self.track_hidden_states(out) outs.append(out) diff --git a/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py b/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py index 8bd3d32d320..211cfebbad1 100644 --- a/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py +++ b/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py @@ -221,7 +221,7 @@ def load_module(model, module_name, path, device="cpu"): for n, p in module.named_parameters(): param_name = module_name + "." + n value = load_value(model, param_name, path) - set_module_tensor_to_device(model, param_name, device, value) + set_module_tensor_to_device(model, param_name, device, value, dtype=value.dtype) def register_weight_hooks(model, path, device="cpu", clean_weight=True, saved_path=None): @@ -239,7 +239,7 @@ def hook(module, input): value = state_dict[n] else: value = load_value(model, param_name, path) - set_module_tensor_to_device(model, param_name, device, value) + set_module_tensor_to_device(model, param_name, device, value, dtype=value.dtype) return hook