From 2af84933c8c3b8a0c799940ad0c4973a77853ac0 Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Wed, 15 Oct 2025 16:15:01 +0800
Subject: [PATCH 1/2] fix gptq NF4/FP4

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 neural_compressor/torch/algorithms/weight_only/utility.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/neural_compressor/torch/algorithms/weight_only/utility.py b/neural_compressor/torch/algorithms/weight_only/utility.py
index ef66e8734a8..053af7d777f 100644
--- a/neural_compressor/torch/algorithms/weight_only/utility.py
+++ b/neural_compressor/torch/algorithms/weight_only/utility.py
@@ -516,6 +516,7 @@ def quant_weight_w_scale(weight, scale, scale_bf16_to_fp8, zp=None, group_size=-
         if dtype in FLOAT_MAPPING.keys():  # NF4 FP4
             int_weight_tmp = weight[:, i * group_size : (i + 1) * group_size]
             quantize_4bit(int_weight_tmp, scale=scale[:, i].unsqueeze(1), dtype=dtype, return_int=True)[0]
+            int_weight[:, leng * group_size :].copy_(int_weight_tmp)
         else:
             int_weight_tmp = weight[:, i * group_size : (i + 1) * group_size].div_(scale[:, i].unsqueeze(1))
             if zp is not None:
@@ -526,6 +527,7 @@ def quant_weight_w_scale(weight, scale, scale_bf16_to_fp8, zp=None, group_size=-
         if dtype in FLOAT_MAPPING.keys():  # NF4 FP4
             int_weight_tmp = weight[:, leng * group_size :]
             quantize_4bit(int_weight_tmp, scale=scale[:, -1].unsqueeze(1), dtype=dtype, return_int=True)[0]
+            int_weight[:, leng * group_size :].copy_(int_weight_tmp)
         else:
             int_weight_tmp = weight[:, leng * group_size :].div_(scale[:, -1].unsqueeze(1))
             if zp is not None:

From 5fe07d9cb78906aeb9fdfeea3c8d25fd7f0dbe10 Mon Sep 17 00:00:00 2001
From: Kaihui-intel <kaihui.tang@intel.com>
Date: Thu, 16 Oct 2025 08:12:41 +0800
Subject: [PATCH 2/2] update int_weight

Signed-off-by: Kaihui-intel <kaihui.tang@intel.com>
---
 neural_compressor/torch/algorithms/weight_only/utility.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neural_compressor/torch/algorithms/weight_only/utility.py b/neural_compressor/torch/algorithms/weight_only/utility.py
index 053af7d777f..eea06883a22 100644
--- a/neural_compressor/torch/algorithms/weight_only/utility.py
+++ b/neural_compressor/torch/algorithms/weight_only/utility.py
@@ -516,7 +516,7 @@ def quant_weight_w_scale(weight, scale, scale_bf16_to_fp8, zp=None, group_size=-
         if dtype in FLOAT_MAPPING.keys():  # NF4 FP4
             int_weight_tmp = weight[:, i * group_size : (i + 1) * group_size]
             quantize_4bit(int_weight_tmp, scale=scale[:, i].unsqueeze(1), dtype=dtype, return_int=True)[0]
-            int_weight[:, leng * group_size :].copy_(int_weight_tmp)
+            int_weight[:, i * group_size : (i + 1) * group_size].copy_(int_weight_tmp)
         else:
             int_weight_tmp = weight[:, i * group_size : (i + 1) * group_size].div_(scale[:, i].unsqueeze(1))
             if zp is not None: