Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .azure-pipelines/scripts/ut/run_ut.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
set -xe

test_part=$1

# install requirements
echo "##[group]set up UT env..."
export TQDM_MININTERVAL=60
echo "##[group]set up UT env..."
uv pip install pytest-cov pytest-html
uv pip install -r /auto-round/test/test_cpu/requirements.txt \
--extra-index-url https://download.pytorch.org/whl/cpu
uv pip install torch==2.8.0 torchvision --index-url https://download.pytorch.org/whl/cpu
uv pip list
# workaround for ark test, remove auto_round_kernel_xpu
package_path=$(uv pip show auto-round-lib | grep Location:|cut -d: -f2)
rm -rf $package_path/auto_round_kernel/auto_round_kernel_xpu*
echo "##[endgroup]"

# install latest gguf for ut test
cd ~ || exit 1
Expand Down
1 change: 0 additions & 1 deletion .azure-pipelines/scripts/ut/run_ut_xpu.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/bin/bash
set -xe

# install requirements
echo "##[group]set up UT env..."
uv pip install pytest-cov pytest-html
uv pip list
Expand Down
5 changes: 3 additions & 2 deletions .azure-pipelines/template/ut-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,11 @@ steps:
&& uv pip list"
else
docker exec ${{ parameters.utContainerName }} bash -c "cd /auto-round \
&& uv pip install torch==2.8.0 torchvision --index-url https://download.pytorch.org/whl/cpu \
&& uv pip install intel-extension-for-pytorch==2.8.0 \
&& uv pip install torch==2.9.1 torchvision --index-url https://download.pytorch.org/whl/cpu \
&& uv pip install torch==2.9.1 auto-round-lib \
&& uv pip install -r requirements.txt \
&& uv pip install -r requirements-cpu.txt \
&& uv pip install -r test/test_cpu/requirements.txt \
&& uv pip list"
fi
displayName: "Env Setup"
Expand Down
2 changes: 1 addition & 1 deletion requirements-cpu.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
numba
tbb
intel-extension-for-pytorch
auto-round-lib
61 changes: 1 addition & 60 deletions test/test_cpu/core/test_autoround.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,73 +438,14 @@ def test_fallback_layers(self, tiny_opt_model_path, dataloader):
quantized_model_path = self.save_folder

autoround.save_quantized(output_dir=quantized_model_path, format="auto_round", inplace=True)
quantization_config = AutoRoundConfig(backend="ipex")

model = AutoModelForCausalLM.from_pretrained(
quantized_model_path, device_map="cpu", quantization_config=quantization_config
)
model = AutoModelForCausalLM.from_pretrained(quantized_model_path, device_map="cpu")
tokenizer = AutoTokenizer.from_pretrained(quantized_model_path)
text = "There is a girl who likes adventure,"
inputs = tokenizer(text, return_tensors="pt").to(model.device)
res = tokenizer.decode(model.generate(**inputs, max_new_tokens=1)[0])
shutil.rmtree(self.save_folder, ignore_errors=True)

def test_not_convert_modules(self):
import requests
from PIL import Image
from transformers import AutoProcessor, Qwen2VLForConditionalGeneration

from auto_round_extension.ipex.qlinear_ipex_awq import QuantLinear

model_name = get_model_path("Qwen/Qwen2-VL-2B-Instruct-AWQ")
quantization_config = AutoRoundConfig()
model = Qwen2VLForConditionalGeneration.from_pretrained(
model_name, quantization_config=quantization_config, device_map="cpu", torch_dtype=torch.float16
)
if transformers_version < version.parse("5.0.0"):
assert isinstance(model.visual.blocks[0].attn.qkv, torch.nn.Linear)
assert not isinstance(model.visual.merger.mlp[0], QuantLinear)
else:
assert isinstance(model.model.visual.blocks[0].attn.qkv, torch.nn.Linear)
assert not isinstance(model.model.visual.merger.mlp[0], QuantLinear)
if hasattr(model.model, "language_model"):
assert isinstance(model.model.language_model.layers[0].self_attn.v_proj, QuantLinear)
else:
assert isinstance(model.model.layers[0].self_attn.v_proj, QuantLinear)

processor = AutoProcessor.from_pretrained(model_name, size=None)
image_url = "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg"
messages = [
{
"role": "user",
"content": [
{
"type": "image",
"image": image_url,
},
{"type": "text", "text": "Describe this image."},
],
}
]

# Preparation for inference
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
image_inputs = Image.open(requests.get(image_url, stream=True).raw)
inputs = processor(
text=[text],
images=image_inputs,
padding=True,
return_tensors="pt",
)

# Inference: Generation of the output
generated_ids = model.generate(**inputs, max_new_tokens=1)
generated_ids_trimmed = [out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
output_text = processor.batch_decode(
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)
print(output_text)

def test_fallback_layers_regex_awq(self, tiny_opt_model_path, dataloader):
model_name = tiny_opt_model_path
bits, group_size, sym = 4, 128, True
Expand Down
9 changes: 2 additions & 7 deletions test/test_cpu/utils/test_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,15 @@ def test_4bits_sym(self, dataloader):

autoround.quantize_and_save(output_dir=quantized_model_path, format="auto_round", inplace=False)

quantization_config = AutoRoundConfig(backend="ipex")
model = AutoModelForCausalLM.from_pretrained(
quantized_model_path, device_map="cpu", quantization_config=quantization_config
)
model = AutoModelForCausalLM.from_pretrained(quantized_model_path, device_map="cpu")
tokenizer = AutoTokenizer.from_pretrained(quantized_model_path)
text = "My name is "
inputs = tokenizer(text, return_tensors="pt").to(model.device)
res = tokenizer.decode(model.generate(**inputs, max_new_tokens=50)[0])
print(res)
assert "!!!" not in res

model = AutoModelForCausalLM.from_pretrained(
quantized_model_path, device_map="cpu", quantization_config=quantization_config, torch_dtype=torch.float16
)
model = AutoModelForCausalLM.from_pretrained(quantized_model_path, device_map="cpu", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(quantized_model_path)
text = "There is a girl who likes adventure,"
inputs = tokenizer(text, return_tensors="pt").to(model.device)
Expand Down