From d28be8b0d490f34e858332849d1e07e9910dcccc Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 24 Jul 2025 09:29:31 +0300 Subject: [PATCH 1/2] metal : fix fusion across different encoders ggml-ci --- ggml/src/ggml-metal/ggml-metal.m | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m index dc391a0d4d549..294cccadc6ff4 100644 --- a/ggml/src/ggml-metal/ggml-metal.m +++ b/ggml/src/ggml-metal/ggml-metal.m @@ -1955,6 +1955,7 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex static int ggml_metal_encode_node( ggml_backend_t backend, int idx, + int idx_end, id encoder, struct ggml_metal_mem_pool * mem_pool) { struct ggml_backend_metal_context * ctx = backend->context; @@ -2181,7 +2182,9 @@ static int ggml_metal_encode_node( size_t offs_fuse; id id_fuse; - for (n_fuse = 0; n_fuse <= 6; ++n_fuse) { + // note: in metal, we sometimes encode the graph in parallel so we have to avoid fusing nodes + // across splits. idx_end indicates the last node in the current split + for (n_fuse = 0; n_fuse <= 6 && idx + n_fuse + 1 < idx_end; ++n_fuse) { if (!ggml_can_fuse(gf, idx + n_fuse, ops + n_fuse, 2)) { break; } @@ -4288,7 +4291,7 @@ static int ggml_metal_encode_node( ops[1] = GGML_OP_MUL; ops[2] = GGML_OP_ADD; - for (n_fuse = 0; n_fuse <= 1; ++n_fuse) { + for (n_fuse = 0; n_fuse <= 1 && idx + n_fuse + 1 < idx_end; ++n_fuse) { if (!ggml_can_fuse(gf, idx + n_fuse, ops + n_fuse, 2)) { break; } @@ -6271,7 +6274,7 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) { [encoder pushDebugGroup:[NSString stringWithCString:ggml_op_desc(ggml_graph_node(ctx->gf, idx)) encoding:NSUTF8StringEncoding]]; } - const int res = ggml_metal_encode_node(backend, idx, encoder, mem_pool); + const int res = ggml_metal_encode_node(backend, idx, node_end, encoder, mem_pool); if (should_capture) { [encoder popDebugGroup]; From 42826ebba61508bfda60ad5d7879a6eae29555ed Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 24 Jul 2025 09:51:14 +0300 Subject: [PATCH 2/2] cont : add assertion ggml-ci --- ggml/src/ggml-metal/ggml-metal.m | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m index 294cccadc6ff4..1a9999325fe27 100644 --- a/ggml/src/ggml-metal/ggml-metal.m +++ b/ggml/src/ggml-metal/ggml-metal.m @@ -6275,6 +6275,10 @@ static void ggml_backend_metal_set_n_cb(ggml_backend_t backend, int n_cb) { } const int res = ggml_metal_encode_node(backend, idx, node_end, encoder, mem_pool); + if (idx + res > node_end) { + GGML_ABORT("fusion error: nodes spanning multiple encoders have been fused. this indicates a bug in the fusion logic %s", + "https://github.com/ggml-org/llama.cpp/pull/14849"); + } if (should_capture) { [encoder popDebugGroup];