From 73235c8127faba926e045b0ca1fcdc05df5a191d Mon Sep 17 00:00:00 2001 From: Acly Date: Fri, 3 Oct 2025 03:03:22 +0200 Subject: [PATCH] ggml : fix graph reallocation with multiple chunks reallocation is needed if a single chunk grows in size, even if total allocation size stays the same or is lower --- ggml/src/ggml-alloc.c | 30 ++++++++++++++++-------------- tests/test-alloc.cpp | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c index fa46f3b491aa5..929bc4488156f 100644 --- a/ggml/src/ggml-alloc.c +++ b/ggml/src/ggml-alloc.c @@ -392,12 +392,8 @@ static void ggml_dyn_tallocr_free(struct ggml_dyn_tallocr * alloc) { free(alloc); } -static size_t ggml_dyn_tallocr_max_size(struct ggml_dyn_tallocr * alloc) { - size_t max_size = 0; - for (int i = 0; i < alloc->n_chunks; i++) { - max_size += alloc->chunks[i]->max_size; - } - return max_size; +static size_t ggml_dyn_tallocr_max_size(struct ggml_dyn_tallocr * alloc, int chunk) { + return chunk < alloc->n_chunks ? alloc->chunks[chunk]->max_size : 0; } @@ -417,10 +413,8 @@ static void ggml_vbuffer_free(struct vbuffer * buf) { free(buf); } -static int ggml_vbuffer_n_chunks(struct vbuffer * buf) { - int n = 0; - while (n < GGML_VBUFFER_MAX_CHUNKS && buf->chunks[n]) n++; - return n; +static size_t ggml_vbuffer_chunk_size(struct vbuffer * buf, int chunk) { + return buf->chunks[chunk] ? ggml_backend_buffer_get_size(buf->chunks[chunk]) : 0; } static size_t ggml_vbuffer_size(struct vbuffer * buf) { @@ -885,12 +879,20 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c } } - size_t cur_size = galloc->buffers[i] ? ggml_vbuffer_size(galloc->buffers[i]) : 0; - size_t new_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i]); - // even if there are no tensors allocated in this buffer, we still need to allocate it to initialize views - if (new_size > cur_size || galloc->buffers[i] == NULL) { + bool realloc = galloc->buffers[i] == NULL; + size_t new_size = 0; + for (int c = 0; c < galloc->buf_tallocs[i]->n_chunks; c++) { + size_t cur_chunk_size = galloc->buffers[i] ? ggml_vbuffer_chunk_size(galloc->buffers[i], c) : 0; + size_t new_chunk_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i], c); + new_size += new_chunk_size; + if (new_chunk_size > cur_chunk_size) { + realloc = true; + } + } + if (realloc) { #ifndef NDEBUG + size_t cur_size = galloc->buffers[i] ? ggml_vbuffer_size(galloc->buffers[i]) : 0; GGML_LOG_DEBUG("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0); #endif diff --git a/tests/test-alloc.cpp b/tests/test-alloc.cpp index 2eb7724731acc..95e09c97b02e3 100644 --- a/tests/test-alloc.cpp +++ b/tests/test-alloc.cpp @@ -548,6 +548,41 @@ static void test_buffer_size_zero() { GGML_ASSERT(backend_b.context->allocated_total() == 0); } +// Test re-using gallocr for a different graph. The new graph has the same +// total size, but one of the chunks is larger, so reallocation is required. +static void test_reallocation() { + dummy_backend backend = dummy_backend_init(32, /*align*/ 4); + ggml_gallocr_ptr galloc; + { + auto [ctx, graph, ctx_ptr] = make_context(); + ggml_tensor * x[4]; + x[0] = make_input_with_size(ctx, 24); + x[1] = make_input_with_size(ctx, 16); + x[2] = ggml_view_1d(ctx, x[0], 4, 0); + x[3] = ggml_add(ctx, x[2], x[1]); + assign_names(ctx); + + galloc = allocate_graph(graph, x[3], &backend.buffer_type); + check_all_allocated(graph); + GGML_ASSERT(backend.context->allocated_total() == 40); + } + { + auto [ctx, graph, ctx_ptr] = make_context(); + ggml_tensor * x[3]; + x[0] = make_input_with_size(ctx, 20); + x[1] = make_input_with_size(ctx, 20); + x[2] = ggml_add(ctx, x[0], x[1]); + assign_names(ctx); + ggml_set_output(x[2]); + ggml_build_forward_expand(graph, x[2]); + + bool result = ggml_gallocr_alloc_graph(galloc.get(), graph); + GGML_ASSERT(result); + check_all_allocated(graph); + GGML_ASSERT(backend.context->allocated_total() == 40); + } +} + static void run(const char * name, void (*f)()) { printf("%s ", name); fflush(stdout); @@ -568,5 +603,6 @@ int main() { run("test_prefer_already_allocated_memory", test_prefer_already_allocated_memory); run("test_multiple_buffer_types", test_multiple_buffer_types); run("test_buffer_size_zero", test_buffer_size_zero); + run("test_reallocation", test_reallocation); return 0; }