From 53691fd22c3f25027370f3ff4b2aba4cd7a121d2 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Fri, 30 May 2025 13:50:27 +0800 Subject: [PATCH 1/2] fix vlm ut Signed-off-by: Kaihui-intel --- .../quantization/weight_only/test_transformers.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/test/3x/torch/quantization/weight_only/test_transformers.py b/test/3x/torch/quantization/weight_only/test_transformers.py index 190c4cea33a..3aacd5c1c78 100644 --- a/test/3x/torch/quantization/weight_only/test_transformers.py +++ b/test/3x/torch/quantization/weight_only/test_transformers.py @@ -247,15 +247,21 @@ def test_vlm(self): from intel_extension_for_pytorch.nn.utils._quantize_convert import WeightOnlyQuantizedLinear else: from intel_extension_for_pytorch.nn.modules import WeightOnlyQuantizedLinear - assert isinstance(woq_model.model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "replacing model failed." + + if Version(transformers.__version__) >= Version("4.52.3"): + assert isinstance(woq_model.model.language_model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "replacing model failed." + else: + assert isinstance(woq_model.model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "replacing model failed." #save woq_model.save_pretrained("transformers_vlm_tmp") #load loaded_model = Qwen2VLForConditionalGeneration.from_pretrained("transformers_vlm_tmp") - assert isinstance(loaded_model.model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "loaing model failed." - + if Version(transformers.__version__) >= Version("4.52.3"): + assert isinstance(loaded_model.model.language_model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "loaing model failed." + else: + assert isinstance(loaded_model.model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "loaing model failed." # phi-3-vision-128k-instruct, disable as CI consumes too much time # woq_config = AutoRoundConfig( # bits=4, From 65ba7e7d723d50b2ac77a7b2a00705fa5ad4a767 Mon Sep 17 00:00:00 2001 From: Kaihui-intel Date: Fri, 30 May 2025 14:46:42 +0800 Subject: [PATCH 2/2] update transformers version Signed-off-by: Kaihui-intel --- test/3x/torch/quantization/weight_only/test_transformers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/3x/torch/quantization/weight_only/test_transformers.py b/test/3x/torch/quantization/weight_only/test_transformers.py index 3aacd5c1c78..ff1cd853f16 100644 --- a/test/3x/torch/quantization/weight_only/test_transformers.py +++ b/test/3x/torch/quantization/weight_only/test_transformers.py @@ -248,7 +248,7 @@ def test_vlm(self): else: from intel_extension_for_pytorch.nn.modules import WeightOnlyQuantizedLinear - if Version(transformers.__version__) >= Version("4.52.3"): + if Version(transformers.__version__) >= Version("4.52"): assert isinstance(woq_model.model.language_model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "replacing model failed." else: assert isinstance(woq_model.model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "replacing model failed." @@ -258,7 +258,7 @@ def test_vlm(self): #load loaded_model = Qwen2VLForConditionalGeneration.from_pretrained("transformers_vlm_tmp") - if Version(transformers.__version__) >= Version("4.52.3"): + if Version(transformers.__version__) >= Version("4.52"): assert isinstance(loaded_model.model.language_model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "loaing model failed." else: assert isinstance(loaded_model.model.layers[0].self_attn.k_proj, WeightOnlyQuantizedLinear), "loaing model failed."