From bf13eee592ef6c0d232f2db90fe6aee60a057435 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= <sigbjorn.skjaeret@scala.com>
Date: Tue, 9 Sep 2025 22:03:39 +0200
Subject: [PATCH 1/2] support non-contiguous Q in build_attn_mha

---
 src/llama-graph.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
index 7f254b25cd451..2065edbe51316 100644
--- a/src/llama-graph.cpp
+++ b/src/llama-graph.cpp
@@ -1273,7 +1273,7 @@ ggml_tensor * llm_graph_context::build_attn_mha(
     // split the batch into streams if needed
     const auto n_stream = k->ne[3];
 
-    q = ggml_reshape_4d(ctx0, q, q->ne[0], q->ne[1], q->ne[2]/n_stream, n_stream);
+    q = ggml_view_4d(ctx0, q, q->ne[0], q->ne[1], q->ne[2]/n_stream, n_stream, q->nb[1], q->nb[2], q->nb[3], 0);
 
     q = ggml_permute(ctx0, q, 0, 2, 1, 3);
     k = ggml_permute(ctx0, k, 0, 2, 1, 3);

From 1c51edafa1e28ee3f2f5087d5dbd1bf1c2c5be95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= <sigbjorn.skjaeret@scala.com>
Date: Wed, 10 Sep 2025 14:00:02 +0200
Subject: [PATCH 2/2] Update src/llama-graph.cpp

ggml-ci

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 src/llama-graph.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
index 2065edbe51316..ddc772b179f7e 100644
--- a/src/llama-graph.cpp
+++ b/src/llama-graph.cpp
@@ -1273,7 +1273,7 @@ ggml_tensor * llm_graph_context::build_attn_mha(
     // split the batch into streams if needed
     const auto n_stream = k->ne[3];
 
-    q = ggml_view_4d(ctx0, q, q->ne[0], q->ne[1], q->ne[2]/n_stream, n_stream, q->nb[1], q->nb[2], q->nb[3], 0);
+    q = ggml_view_4d(ctx0, q, q->ne[0], q->ne[1], q->ne[2]/n_stream, n_stream, q->nb[1], q->nb[2], q->nb[3]/n_stream, 0);
 
     q = ggml_permute(ctx0, q, 0, 2, 1, 3);
     k = ggml_permute(ctx0, k, 0, 2, 1, 3);