From 1516bdca4cb38125b252913418e294e147c4ea51 Mon Sep 17 00:00:00 2001
From: xin3he <xin3.he@intel.com>
Date: Tue, 4 Mar 2025 10:54:40 +0800
Subject: [PATCH 1/2] fix saving issue for group_size=-1

Signed-off-by: xin3he <xin3.he@intel.com>
---
 neural_compressor/transformers/quantization/utils.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/neural_compressor/transformers/quantization/utils.py b/neural_compressor/transformers/quantization/utils.py
index 0ab18b91b0c..83ed47c8ca9 100644
--- a/neural_compressor/transformers/quantization/utils.py
+++ b/neural_compressor/transformers/quantization/utils.py
@@ -660,8 +660,14 @@ def convert_to_GPTQ_checkpoints(model, quantization_config):
             new_module.n_pack = 32 // bits
             scales = module._op_context.get_scales().t().contiguous()
             bias = module._op_context.get_bias()
+            qzeros = module._op_context.get_zero_points().t().to(torch.uint8)
+            # For group_size = -1, the dimensions of scale and qzeros will be 1
+            if len(scales.shape) == 1:
+                scales = scales.unsqueeze(0)
+            if len(qzeros.shape) == 1:
+                qzeros = qzeros.unsqueeze(0)
             qzeros = new_module.pack_tensor_with_numpy(
-                module._op_context.get_zero_points().t().to(torch.uint8) - 1
+                qzeros - 1
             ).contiguous()
             g_idx = module._op_context.get_g_idx()
 

From e46a67abb6300e344fe02ca435b1119fdac08df0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 4 Mar 2025 03:04:55 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 neural_compressor/transformers/quantization/utils.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/neural_compressor/transformers/quantization/utils.py b/neural_compressor/transformers/quantization/utils.py
index 83ed47c8ca9..f09262fb01d 100644
--- a/neural_compressor/transformers/quantization/utils.py
+++ b/neural_compressor/transformers/quantization/utils.py
@@ -666,9 +666,7 @@ def convert_to_GPTQ_checkpoints(model, quantization_config):
                 scales = scales.unsqueeze(0)
             if len(qzeros.shape) == 1:
                 qzeros = qzeros.unsqueeze(0)
-            qzeros = new_module.pack_tensor_with_numpy(
-                qzeros - 1
-            ).contiguous()
+            qzeros = new_module.pack_tensor_with_numpy(qzeros - 1).contiguous()
             g_idx = module._op_context.get_g_idx()
 
             new_module.qweight = qweight