Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions ggml/src/ggml-alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -392,12 +392,8 @@ static void ggml_dyn_tallocr_free(struct ggml_dyn_tallocr * alloc) {
free(alloc);
}

static size_t ggml_dyn_tallocr_max_size(struct ggml_dyn_tallocr * alloc) {
size_t max_size = 0;
for (int i = 0; i < alloc->n_chunks; i++) {
max_size += alloc->chunks[i]->max_size;
}
return max_size;
static size_t ggml_dyn_tallocr_max_size(struct ggml_dyn_tallocr * alloc, int chunk) {
return chunk < alloc->n_chunks ? alloc->chunks[chunk]->max_size : 0;
}


Expand All @@ -417,10 +413,8 @@ static void ggml_vbuffer_free(struct vbuffer * buf) {
free(buf);
}

static int ggml_vbuffer_n_chunks(struct vbuffer * buf) {
int n = 0;
while (n < GGML_VBUFFER_MAX_CHUNKS && buf->chunks[n]) n++;
return n;
static size_t ggml_vbuffer_chunk_size(struct vbuffer * buf, int chunk) {
return buf->chunks[chunk] ? ggml_backend_buffer_get_size(buf->chunks[chunk]) : 0;
}

static size_t ggml_vbuffer_size(struct vbuffer * buf) {
Expand Down Expand Up @@ -885,12 +879,20 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
}
}

size_t cur_size = galloc->buffers[i] ? ggml_vbuffer_size(galloc->buffers[i]) : 0;
size_t new_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i]);

// even if there are no tensors allocated in this buffer, we still need to allocate it to initialize views
if (new_size > cur_size || galloc->buffers[i] == NULL) {
bool realloc = galloc->buffers[i] == NULL;
size_t new_size = 0;
for (int c = 0; c < galloc->buf_tallocs[i]->n_chunks; c++) {
size_t cur_chunk_size = galloc->buffers[i] ? ggml_vbuffer_chunk_size(galloc->buffers[i], c) : 0;
size_t new_chunk_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i], c);
new_size += new_chunk_size;
if (new_chunk_size > cur_chunk_size) {
realloc = true;
}
}
if (realloc) {
#ifndef NDEBUG
size_t cur_size = galloc->buffers[i] ? ggml_vbuffer_size(galloc->buffers[i]) : 0;
GGML_LOG_DEBUG("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
#endif

Expand Down
36 changes: 36 additions & 0 deletions tests/test-alloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,41 @@ static void test_buffer_size_zero() {
GGML_ASSERT(backend_b.context->allocated_total() == 0);
}

// Test re-using gallocr for a different graph. The new graph has the same
// total size, but one of the chunks is larger, so reallocation is required.
static void test_reallocation() {
dummy_backend backend = dummy_backend_init(32, /*align*/ 4);
ggml_gallocr_ptr galloc;
{
auto [ctx, graph, ctx_ptr] = make_context();
ggml_tensor * x[4];
x[0] = make_input_with_size(ctx, 24);
x[1] = make_input_with_size(ctx, 16);
x[2] = ggml_view_1d(ctx, x[0], 4, 0);
x[3] = ggml_add(ctx, x[2], x[1]);
assign_names(ctx);

galloc = allocate_graph(graph, x[3], &backend.buffer_type);
check_all_allocated(graph);
GGML_ASSERT(backend.context->allocated_total() == 40);
}
{
auto [ctx, graph, ctx_ptr] = make_context();
ggml_tensor * x[3];
x[0] = make_input_with_size(ctx, 20);
x[1] = make_input_with_size(ctx, 20);
x[2] = ggml_add(ctx, x[0], x[1]);
assign_names(ctx);
ggml_set_output(x[2]);
ggml_build_forward_expand(graph, x[2]);

bool result = ggml_gallocr_alloc_graph(galloc.get(), graph);
GGML_ASSERT(result);
check_all_allocated(graph);
GGML_ASSERT(backend.context->allocated_total() == 40);
}
}

static void run(const char * name, void (*f)()) {
printf("%s ", name);
fflush(stdout);
Expand All @@ -568,5 +603,6 @@ int main() {
run("test_prefer_already_allocated_memory", test_prefer_already_allocated_memory);
run("test_multiple_buffer_types", test_multiple_buffer_types);
run("test_buffer_size_zero", test_buffer_size_zero);
run("test_reallocation", test_reallocation);
return 0;
}
Loading