From 2af84933c8c3b8a0c799940ad0c4973a77853ac0 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Wed, 15 Oct 2025 16:15:01 +0800 Subject: [PATCH 1/2] fix gptq NF4/FP4 Signed-off-by: Kaihui-intel --- neural_compressor/torch/algorithms/weight_only/utility.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/neural_compressor/torch/algorithms/weight_only/utility.py b/neural_compressor/torch/algorithms/weight_only/utility.py index ef66e8734a8..053af7d777f 100644 --- a/neural_compressor/torch/algorithms/weight_only/utility.py +++ b/neural_compressor/torch/algorithms/weight_only/utility.py @@ -516,6 +516,7 @@ def quant_weight_w_scale(weight, scale, scale_bf16_to_fp8, zp=None, group_size=- if dtype in FLOAT_MAPPING.keys(): # NF4 FP4 int_weight_tmp = weight[:, i * group_size : (i + 1) * group_size] quantize_4bit(int_weight_tmp, scale=scale[:, i].unsqueeze(1), dtype=dtype, return_int=True)[0] + int_weight[:, leng * group_size :].copy_(int_weight_tmp) else: int_weight_tmp = weight[:, i * group_size : (i + 1) * group_size].div_(scale[:, i].unsqueeze(1)) if zp is not None: @@ -526,6 +527,7 @@ def quant_weight_w_scale(weight, scale, scale_bf16_to_fp8, zp=None, group_size=- if dtype in FLOAT_MAPPING.keys(): # NF4 FP4 int_weight_tmp = weight[:, leng * group_size :] quantize_4bit(int_weight_tmp, scale=scale[:, -1].unsqueeze(1), dtype=dtype, return_int=True)[0] + int_weight[:, leng * group_size :].copy_(int_weight_tmp) else: int_weight_tmp = weight[:, leng * group_size :].div_(scale[:, -1].unsqueeze(1)) if zp is not None: From 5fe07d9cb78906aeb9fdfeea3c8d25fd7f0dbe10 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Thu, 16 Oct 2025 08:12:41 +0800 Subject: [PATCH 2/2] update int_weight Signed-off-by: Kaihui-intel --- neural_compressor/torch/algorithms/weight_only/utility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/torch/algorithms/weight_only/utility.py b/neural_compressor/torch/algorithms/weight_only/utility.py index 053af7d777f..eea06883a22 100644 --- a/neural_compressor/torch/algorithms/weight_only/utility.py +++ b/neural_compressor/torch/algorithms/weight_only/utility.py @@ -516,7 +516,7 @@ def quant_weight_w_scale(weight, scale, scale_bf16_to_fp8, zp=None, group_size=- if dtype in FLOAT_MAPPING.keys(): # NF4 FP4 int_weight_tmp = weight[:, i * group_size : (i + 1) * group_size] quantize_4bit(int_weight_tmp, scale=scale[:, i].unsqueeze(1), dtype=dtype, return_int=True)[0] - int_weight[:, leng * group_size :].copy_(int_weight_tmp) + int_weight[:, i * group_size : (i + 1) * group_size].copy_(int_weight_tmp) else: int_weight_tmp = weight[:, i * group_size : (i + 1) * group_size].div_(scale[:, i].unsqueeze(1)) if zp is not None: