From 4c1c270e5e95626af796432b589a9f54ccc44e55 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 8 Sep 2025 10:13:54 +0300 Subject: [PATCH 1/4] cuda : fix supports_op condition for get_rows when src1->ne2 > 1 ggml-ci --- ggml/src/ggml-cuda/ggml-cuda.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 0c01eb6fa8359..b71a9f17df292 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -3392,6 +3392,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g return op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32 && op->src[1]->type == GGML_TYPE_F32; case GGML_OP_GET_ROWS: { + if (op->src[1]->ne[2] > 1) { + return false; + } switch (op->src[0]->type) { case GGML_TYPE_F16: case GGML_TYPE_F32: From c453e5e2d17ca452cb6c7c95cafff8deb2e137f6 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 8 Sep 2025 11:17:56 +0300 Subject: [PATCH 2/4] ggml : add comment about ggml_get_rows ggml-ci --- ggml/include/ggml.h | 6 +++++- ggml/src/ggml.c | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index c01b98ac78f5a..5490173c99e9a 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -1528,7 +1528,11 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); - // supports 3D: a->ne[2] == b->ne[1] + // supports 4D a: + // a [n_embd, ne1, ne2, ne3] + // b I32 [n_rows, ne2, ne3, 1] + // + // return [n_embd, n_rows, ne2, ne3] GGML_API struct ggml_tensor * ggml_get_rows( struct ggml_context * ctx, struct ggml_tensor * a, // data diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index f35c337952ec3..50dc1aa24fff5 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -3623,6 +3623,7 @@ struct ggml_tensor * ggml_get_rows( struct ggml_tensor * a, struct ggml_tensor * b) { GGML_ASSERT(a->ne[2] == b->ne[1]); + GGML_ASSERT(a->ne[3] == b->ne[2]); GGML_ASSERT(b->ne[3] == 1); GGML_ASSERT(b->type == GGML_TYPE_I32); From 8c2a5fa603b0bbf11de2f4adbe075061fbd3ca8c Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 8 Sep 2025 11:21:11 +0300 Subject: [PATCH 3/4] cuda : add FIXME [no ci] --- ggml/src/ggml-cuda/ggml-cuda.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index b71a9f17df292..5241056ca6a11 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -3392,6 +3392,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g return op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32 && op->src[1]->type == GGML_TYPE_F32; case GGML_OP_GET_ROWS: { + // FIXME: https://github.com/ggml-org/llama.cpp/pull/15868 if (op->src[1]->ne[2] > 1) { return false; } From 2aee6206b68934791cd7b3ea523d899d826812ea Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 8 Sep 2025 11:48:32 +0300 Subject: [PATCH 4/4] cuda : update support condition ggml-ci --- ggml/src/ggml-cuda/ggml-cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 5241056ca6a11..eafcefca8db1b 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -3393,7 +3393,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g case GGML_OP_GET_ROWS: { // FIXME: https://github.com/ggml-org/llama.cpp/pull/15868 - if (op->src[1]->ne[2] > 1) { + if (op->src[1]->ne[1]*op->src[1]->ne[2] > 65535) { return false; } switch (op->src[0]->type) {