LLM: fix qwen AutoTP (#10766)

intel-analytics · Apr 16, 2024 · 0a62933 · 0a62933
1 parent 3e2662c
commit 0a62933
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/python/llm/src/ipex_llm/transformers/convert.py b/python/llm/src/ipex_llm/transformers/convert.py
@@ -617,7 +617,8 @@ def split_qkv_proj_func(module):
             if "QWenAttention" in module.__class__.__name__:
                 c_attn_weight = module.c_attn.weight.data
                 c_attn_bias = module.c_attn.bias.data
-                projection_size = module.projection_size
+                # Compatible with AutoTP case
+                projection_size = c_attn_weight.shape[0] // 3
                 hid_size = module.hidden_size
                 with init_empty_weights():
                     q_proj = torch.nn.Linear(hid_size, projection_size)