From 8c53c5ddde0332ea7683d1803a24ac2566511a77 Mon Sep 17 00:00:00 2001 From: Mahekk Date: Sat, 29 Nov 2025 12:49:36 -0500 Subject: [PATCH 1/2] cuda : add error checking for cudaMemcpyAsync in argsort (#12836) --- ggml/src/ggml-cuda/argsort.cu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ggml/src/ggml-cuda/argsort.cu b/ggml/src/ggml-cuda/argsort.cu index 3722cf3ab26..0a8ddd44fbd 100644 --- a/ggml/src/ggml-cuda/argsort.cu +++ b/ggml/src/ggml-cuda/argsort.cu @@ -44,7 +44,7 @@ static void argsort_f32_i32_cuda_cub(ggml_cuda_pool & pool, const dim3 offset_grid((nrows + block_size - 1) / block_size); init_offsets<<>>(d_offsets, ncols, nrows); - cudaMemcpyAsync(temp_keys, x, ncols * nrows * sizeof(float), cudaMemcpyDeviceToDevice, stream); + CUDA_CHECK(cudaMemcpyAsync(temp_keys, x, ncols * nrows * sizeof(float), cudaMemcpyDeviceToDevice, stream)); size_t temp_storage_bytes = 0; @@ -59,7 +59,6 @@ static void argsort_f32_i32_cuda_cub(ggml_cuda_pool & pool, dst, ncols * nrows, nrows, d_offsets, d_offsets + 1, 0, sizeof(float) * 8, stream); } - ggml_cuda_pool_alloc temp_storage_alloc(pool, temp_storage_bytes); void * d_temp_storage = temp_storage_alloc.get(); From 252d8eadd2d208fe52034c11284698beb0bc30d9 Mon Sep 17 00:00:00 2001 From: Mahekk Date: Sat, 29 Nov 2025 16:37:43 -0500 Subject: [PATCH 2/2] fix indentation --- ggml/src/ggml-cuda/argsort.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/argsort.cu b/ggml/src/ggml-cuda/argsort.cu index 0a8ddd44fbd..da9652c3be8 100644 --- a/ggml/src/ggml-cuda/argsort.cu +++ b/ggml/src/ggml-cuda/argsort.cu @@ -44,7 +44,7 @@ static void argsort_f32_i32_cuda_cub(ggml_cuda_pool & pool, const dim3 offset_grid((nrows + block_size - 1) / block_size); init_offsets<<>>(d_offsets, ncols, nrows); - CUDA_CHECK(cudaMemcpyAsync(temp_keys, x, ncols * nrows * sizeof(float), cudaMemcpyDeviceToDevice, stream)); + CUDA_CHECK(cudaMemcpyAsync(temp_keys, x, ncols * nrows * sizeof(float), cudaMemcpyDeviceToDevice, stream)); size_t temp_storage_bytes = 0; @@ -59,6 +59,7 @@ static void argsort_f32_i32_cuda_cub(ggml_cuda_pool & pool, dst, ncols * nrows, nrows, d_offsets, d_offsets + 1, 0, sizeof(float) * 8, stream); } + ggml_cuda_pool_alloc temp_storage_alloc(pool, temp_storage_bytes); void * d_temp_storage = temp_storage_alloc.get();