From 0a62933d36a91664e5ecfd3e321cb2c18dc97a4a Mon Sep 17 00:00:00 2001 From: binbin Deng <108676127+plusbang@users.noreply.github.com> Date: Tue, 16 Apr 2024 09:56:17 +0800 Subject: [PATCH] LLM: fix qwen AutoTP (#10766) --- python/llm/src/ipex_llm/transformers/convert.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/llm/src/ipex_llm/transformers/convert.py b/python/llm/src/ipex_llm/transformers/convert.py index ff133b35a21..43b9e5f0f4f 100644 --- a/python/llm/src/ipex_llm/transformers/convert.py +++ b/python/llm/src/ipex_llm/transformers/convert.py @@ -617,7 +617,8 @@ def split_qkv_proj_func(module): if "QWenAttention" in module.__class__.__name__: c_attn_weight = module.c_attn.weight.data c_attn_bias = module.c_attn.bias.data - projection_size = module.projection_size + # Compatible with AutoTP case + projection_size = c_attn_weight.shape[0] // 3 hid_size = module.hidden_size with init_empty_weights(): q_proj = torch.nn.Linear(hid_size, projection_size)