From b4e37b74ff077acf85246527a7ddae7a2e3f08d1 Mon Sep 17 00:00:00 2001 From: Yiyang Cai <49231152+YIYANGCAI@users.noreply.github.com> Date: Wed, 28 Feb 2024 15:28:55 +0800 Subject: [PATCH] Fix bugs for gptq exporting with static_groups (#1614) Signed-off-by: YIYANGCAI --- neural_compressor/adaptor/torch_utils/gptq.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/neural_compressor/adaptor/torch_utils/gptq.py b/neural_compressor/adaptor/torch_utils/gptq.py index 7b0ee1c35f7..96e4cf6192e 100644 --- a/neural_compressor/adaptor/torch_utils/gptq.py +++ b/neural_compressor/adaptor/torch_utils/gptq.py @@ -668,7 +668,8 @@ def tmp(_, inp, out): gptq_config[self.get_full_layer_name(layer_name, block_idx)] = {"scale": scale} if not weight_config_this_layer["sym"]: gptq_config[self.get_full_layer_name(layer_name, block_idx)]["zero"] = zp - if weight_config_this_layer["act_order"]: # save perm for restoring the weights + if weight_config_this_layer["act_order"] and not weight_config_this_layer["static_groups"]: + # save perm for restoring the weights, but only when static_groups is not enabled. gptq_config[self.get_full_layer_name(layer_name, block_idx)]["perm"] = gptq_for_this_block[ layer_name ].perm @@ -828,6 +829,11 @@ def fasterquant(self, W, blocksize=128, percdamp=0.01, groupsize=-1, act_order=F zero.append(self.quantizer.zero) else: idx = i1 + i + if (i1 + i) % groupsize == 0: + # load the pre-calculated quantization parameters in groups + static_quantizer = groups[(i1 + i) // groupsize] + scale.append(static_quantizer.scale) + zero.append(static_quantizer.zero) if act_order: idx = perm[idx] self.quantizer = groups[idx // groupsize]