From 39f9747d4db5c1a0de6321fda05ae318d6d24990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Mon, 8 Dec 2025 21:14:02 +0100 Subject: [PATCH] allow fill node alloc inplace --- ggml/src/ggml-alloc.c | 1 + ggml/src/ggml-cuda/fill.cu | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c index 218222ece87..a5995fdc2c3 100644 --- a/ggml/src/ggml-alloc.c +++ b/ggml/src/ggml-alloc.c @@ -25,6 +25,7 @@ static bool ggml_is_view(const struct ggml_tensor * t) { // ops that return true for this function must not use restrict pointers for their backend implementations bool ggml_op_can_inplace(enum ggml_op op) { switch (op) { + case GGML_OP_FILL: case GGML_OP_SCALE: case GGML_OP_DIAG_MASK_ZERO: case GGML_OP_DIAG_MASK_INF: diff --git a/ggml/src/ggml-cuda/fill.cu b/ggml/src/ggml-cuda/fill.cu index eb8ccb7802b..739062c4057 100644 --- a/ggml/src/ggml-cuda/fill.cu +++ b/ggml/src/ggml-cuda/fill.cu @@ -4,7 +4,7 @@ #define CUDA_FILL_BLOCK_SIZE 256 template -static __global__ void fill_kernel(T * __restrict__ dst, const int64_t k, const T value) { +static __global__ void fill_kernel(T * dst, const int64_t k, const T value) { const int64_t i = (int64_t)blockDim.x * blockIdx.x + threadIdx.x; if (i >= k) { return;