From d66530cf1fb7210fee89e66d278f830dc470b9c7 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 6 Oct 2025 11:51:51 +0300 Subject: [PATCH] memory : use sequential equal splits for recurrent modules --- src/llama-memory-hybrid.cpp | 4 +++- src/llama-memory-recurrent.cpp | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/llama-memory-hybrid.cpp b/src/llama-memory-hybrid.cpp index cb8832a353b11..dfb8439e01bdf 100644 --- a/src/llama-memory-hybrid.cpp +++ b/src/llama-memory-hybrid.cpp @@ -73,7 +73,9 @@ llama_memory_context_ptr llama_memory_hybrid::init_batch(llama_batch_allocr & ba // if all tokens are output, split by sequence ubatch = balloc.split_seq(n_ubatch); } else { - ubatch = balloc.split_equal(n_ubatch, false); + // TODO: non-sequential equal split can be done if using unified KV cache + // for simplicity, we always use sequential equal split for now + ubatch = balloc.split_equal(n_ubatch, true); } if (ubatch.n_tokens == 0) { diff --git a/src/llama-memory-recurrent.cpp b/src/llama-memory-recurrent.cpp index e23e74982b278..9402d9cb8df9f 100644 --- a/src/llama-memory-recurrent.cpp +++ b/src/llama-memory-recurrent.cpp @@ -382,7 +382,9 @@ llama_memory_context_ptr llama_memory_recurrent::init_batch(llama_batch_allocr & // if all tokens are output, split by sequence ubatch = balloc.split_seq(n_ubatch); } else { - ubatch = balloc.split_equal(n_ubatch, false); + // TODO: non-sequential equal split can be done if using unified KV cache + // for simplicity, we always use sequential equal split for now + ubatch = balloc.split_equal(n_ubatch, true); } if (ubatch.n_tokens == 0) {