intel · XuehaoSun · Feb 25, 2025 · Feb 24, 2025 · Feb 24, 2025 · Feb 25, 2025
diff --git a/neural_compressor/adaptor/torch_utils/gptq.py b/neural_compressor/adaptor/torch_utils/gptq.py
@@ -718,10 +718,12 @@ def tmp(_, inp, out):
                         for n, p in sub_layer.named_parameters():
                             param_name = full_layer_name + "." + n
                             if n == "weight":
-                                set_module_tensor_to_device(self.model, param_name, self.device, Q)
+                                set_module_tensor_to_device(self.model, param_name, self.device, Q, dtype=Q.dtype)
                             else:
                                 value = load_value(self.model, param_name, model_path)
-                                set_module_tensor_to_device(self.model, param_name, self.device, value)
+                                set_module_tensor_to_device(
+                                    self.model, param_name, self.device, value, dtype=value.dtype
+                                )
                         # sub_layer.weight.data = Q
                         torch.save(sub_layer.state_dict(), LWQ_WORKSPACE + f"/{full_layer_name}.pt")
                         clean_module_weight(sub_layer)
@@ -745,6 +747,8 @@ def tmp(_, inp, out):
             for j in range(len(self.dataloader)):
                 cache_keyword_batch = self.gather_single_batch_from_dict(self.cache_key_arguments, j)
                 cache_positional_batch = self.gather_single_batch_from_list(self.cache_positional_arguments, j)
+                # breakpoint()
+                # transformer_block = transformer_block.to(getattr(torch, self.model.config.torch_dtype))
                 out = transformer_block(*cache_positional_batch, **cache_keyword_batch)
                 out = self.track_hidden_states(out)
                 outs.append(out)

diff --git a/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py b/neural_compressor/adaptor/torch_utils/layer_wise_quant/utils.py
@@ -221,7 +221,7 @@ def load_module(model, module_name, path, device="cpu"):
     for n, p in module.named_parameters():
         param_name = module_name + "." + n
         value = load_value(model, param_name, path)
-        set_module_tensor_to_device(model, param_name, device, value)
+        set_module_tensor_to_device(model, param_name, device, value, dtype=value.dtype)
 
 
 def register_weight_hooks(model, path, device="cpu", clean_weight=True, saved_path=None):
@@ -239,7 +239,7 @@ def hook(module, input):
                     value = state_dict[n]
                 else:
                     value = load_value(model, param_name, path)
-                set_module_tensor_to_device(model, param_name, device, value)
+                set_module_tensor_to_device(model, param_name, device, value, dtype=value.dtype)
 
         return hook