From 0a62933d36a91664e5ecfd3e321cb2c18dc97a4a Mon Sep 17 00:00:00 2001
From: binbin Deng <108676127+plusbang@users.noreply.github.com>
Date: Tue, 16 Apr 2024 09:56:17 +0800
Subject: [PATCH] LLM: fix qwen AutoTP (#10766)

---
 python/llm/src/ipex_llm/transformers/convert.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/llm/src/ipex_llm/transformers/convert.py b/python/llm/src/ipex_llm/transformers/convert.py
index ff133b35a21..43b9e5f0f4f 100644
--- a/python/llm/src/ipex_llm/transformers/convert.py
+++ b/python/llm/src/ipex_llm/transformers/convert.py
@@ -617,7 +617,8 @@ def split_qkv_proj_func(module):
             if "QWenAttention" in module.__class__.__name__:
                 c_attn_weight = module.c_attn.weight.data
                 c_attn_bias = module.c_attn.bias.data
-                projection_size = module.projection_size
+                # Compatible with AutoTP case
+                projection_size = c_attn_weight.shape[0] // 3
                 hid_size = module.hidden_size
                 with init_empty_weights():
                     q_proj = torch.nn.Linear(hid_size, projection_size)