From b1c24935c8abf8e74536cd817e788281a45002c7 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 8 Jul 2025 05:17:52 +0300 Subject: [PATCH 1/2] fix packing Signed-off-by: Yi Liu --- auto_round/export/export_to_autoround/qlinear_triton_act.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/auto_round/export/export_to_autoround/qlinear_triton_act.py b/auto_round/export/export_to_autoround/qlinear_triton_act.py index 59a238c75..a18409130 100644 --- a/auto_round/export/export_to_autoround/qlinear_triton_act.py +++ b/auto_round/export/export_to_autoround/qlinear_triton_act.py @@ -141,8 +141,7 @@ def pack(self, linear, scales, zeros, act_scales, w_bf16_to_fp8_scale, g_idx=Non else: repeat_zeros = zeros - intweight = torch.round(W.to(device) / repeat_scales[:,:W.shape[1]] + repeat_zeros[:,:W.shape[1]]) - + intweight = torch.round(W.to(device) / repeat_scales[:,:W.shape[1]] + repeat_zeros[:,:W.shape[1]]).to(torch.int32) del repeat_scales intweight = intweight.reshape(-1, intweight.shape[1] // 32 * self.bits, 32 // self.bits) order_map = torch.arange(0, 32 // self.bits, device=device) * self.bits From fbe9146cb38cf250f2c0a51a86630f03a7bade7d Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Tue, 8 Jul 2025 06:05:39 +0300 Subject: [PATCH 2/2] fix format Signed-off-by: Yi Liu --- auto_round/export/export_to_autoround/qlinear_triton_act.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/auto_round/export/export_to_autoround/qlinear_triton_act.py b/auto_round/export/export_to_autoround/qlinear_triton_act.py index a18409130..3fd951302 100644 --- a/auto_round/export/export_to_autoround/qlinear_triton_act.py +++ b/auto_round/export/export_to_autoround/qlinear_triton_act.py @@ -141,7 +141,9 @@ def pack(self, linear, scales, zeros, act_scales, w_bf16_to_fp8_scale, g_idx=Non else: repeat_zeros = zeros - intweight = torch.round(W.to(device) / repeat_scales[:,:W.shape[1]] + repeat_zeros[:,:W.shape[1]]).to(torch.int32) + intweight = torch.round(W.to(device) / repeat_scales[:, : W.shape[1]] + repeat_zeros[:, : W.shape[1]]).to( + torch.int32 + ) del repeat_scales intweight = intweight.reshape(-1, intweight.shape[1] // 32 * self.bits, 32 // self.bits) order_map = torch.arange(0, 32 // self.bits, device=device) * self.bits