From dbc71082c81baf0b5968cb9ce50e57b94aa5857a Mon Sep 17 00:00:00 2001 From: "He, Xin3" Date: Mon, 3 Nov 2025 22:14:48 -0500 Subject: [PATCH] update bits Signed-off-by: He, Xin3 --- test/test_cuda/test_auto_scheme.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_cuda/test_auto_scheme.py b/test/test_cuda/test_auto_scheme.py index 70366cf05..c7bec17d0 100644 --- a/test/test_cuda/test_auto_scheme.py +++ b/test/test_cuda/test_auto_scheme.py @@ -84,7 +84,7 @@ def test_shared_layers(self): @multi_card def test_multi_card(self): model_name = "/models/Qwen3-0.6B" - target_bits = 5.265 + target_bits = 4.5 for device_map in ["auto", "0,1", "0", None]: scheme = AutoScheme(avg_bits=target_bits, options=("NVFP4")) ar = AutoRound(model=model_name, scheme=scheme, iters=0, nsamples=1, device_map=device_map) @@ -96,7 +96,7 @@ def test_multi_card(self): @multi_card def test_multi_card_1(self): model_name = "/models/Qwen3-0.6B" - target_bits = 5.265 + target_bits = 4.5 from transformers import AutoModelForCausalLM, AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(model_name) @@ -110,7 +110,7 @@ def test_multi_card_1(self): def test_non_low_gpu_mem_usage(self): model_name = "/models/Qwen3-0.6B" - target_bits = 5.265 + target_bits = 4.5 # for device_map in ["auto", "0,1", "0", None]: scheme = AutoScheme(avg_bits=target_bits, options=("NVFP4"), low_gpu_mem_usage=False, device_map="auto") @@ -123,7 +123,7 @@ def test_non_low_gpu_mem_usage(self): @multi_card def test_dict_device_map(self): model_name = "/models/Qwen3-8B" - target_bits = 8.755 + target_bits = 8.25 device_map = {"up_proj": 0, "down_proj": 1} scheme = AutoScheme(avg_bits=target_bits, options=("MXFP8"))