From b4e37b74ff077acf85246527a7ddae7a2e3f08d1 Mon Sep 17 00:00:00 2001
From: Yiyang Cai <49231152+YIYANGCAI@users.noreply.github.com>
Date: Wed, 28 Feb 2024 15:28:55 +0800
Subject: [PATCH] Fix bugs for gptq exporting with static_groups (#1614)

Signed-off-by: YIYANGCAI <yiyang.cai@intel.com>
---
 neural_compressor/adaptor/torch_utils/gptq.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/neural_compressor/adaptor/torch_utils/gptq.py b/neural_compressor/adaptor/torch_utils/gptq.py
index 7b0ee1c35f7..96e4cf6192e 100644
--- a/neural_compressor/adaptor/torch_utils/gptq.py
+++ b/neural_compressor/adaptor/torch_utils/gptq.py
@@ -668,7 +668,8 @@ def tmp(_, inp, out):
                 gptq_config[self.get_full_layer_name(layer_name, block_idx)] = {"scale": scale}
                 if not weight_config_this_layer["sym"]:
                     gptq_config[self.get_full_layer_name(layer_name, block_idx)]["zero"] = zp
-                if weight_config_this_layer["act_order"]:  # save perm for restoring the weights
+                if weight_config_this_layer["act_order"] and not weight_config_this_layer["static_groups"]:
+                    # save perm for restoring the weights, but only when static_groups is not enabled.
                     gptq_config[self.get_full_layer_name(layer_name, block_idx)]["perm"] = gptq_for_this_block[
                         layer_name
                     ].perm
@@ -828,6 +829,11 @@ def fasterquant(self, W, blocksize=128, percdamp=0.01, groupsize=-1, act_order=F
                             zero.append(self.quantizer.zero)
                     else:
                         idx = i1 + i
+                        if (i1 + i) % groupsize == 0:
+                            # load the pre-calculated quantization parameters in groups
+                            static_quantizer = groups[(i1 + i) // groupsize]
+                            scale.append(static_quantizer.scale)
+                            zero.append(static_quantizer.zero)
                         if act_order:
                             idx = perm[idx]
                         self.quantizer = groups[idx // groupsize]