From fc73924db24f055aab25121ac13c7fda28843877 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Wed, 16 Oct 2024 20:10:01 +0200 Subject: [PATCH 1/2] ggml : remove redundant set of contexts used field (ggml/978) This commit removes the setting of the `used` field of the contexts in the global state (g_state) in `ggml_init`. The motivation for this change is that I believe that this additional initialization might not be required after the changes in Commit 45fc4fed0b9fb5b1af4a8525cbebb95e11208732 ("sync : latest changes from whisper.cpp"), which changed the initialization of the contexts field from `{ 0 }` to `{ { 0 } }`: ```console g_state = (struct ggml_state) { - /*.contexts =*/ { 0 }, + /*.contexts =*/ { { 0 } }, }; ``` My understanding is that the `{0}` initialization might not have zero-initialized all the nested fields in every array element because of compiler differences, and might have been the reason for having the explicit setting of the `used` fields to false. --- ggml/src/ggml.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index b16c462fa1491..1741d333834ed 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -3852,10 +3852,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { }, }; - for (int i = 0; i < GGML_MAX_CONTEXTS; ++i) { - g_state.contexts[i].used = false; - } - const uint64_t t_end = ggml_time_us(); UNUSED(t_end); GGML_PRINT_DEBUG("%s: g_state initialized in %f ms\n", __func__, (t_end - t_start)/1000.0f); From 16c5486d1a75df0d41cbbccd28bad0792a300d28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Fri, 18 Oct 2024 09:24:44 +0200 Subject: [PATCH 2/2] CUDA: fix 1D im2col, add tests (ggml/993) --- ggml/src/ggml-cuda.cu | 1 - ggml/src/ggml-cuda/im2col.cu | 6 +++--- tests/test-backend-ops.cpp | 36 +++++++++++++++++++++++++++++++----- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu index 1338bd45836bb..fa280b529bcb8 100644 --- a/ggml/src/ggml-cuda.cu +++ b/ggml/src/ggml-cuda.cu @@ -3141,7 +3141,6 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g case GGML_OP_ROPE: return ggml_is_contiguous(op->src[0]); case GGML_OP_IM2COL: - return op->src[0]->type == GGML_TYPE_F16; case GGML_OP_POOL_2D: case GGML_OP_SUM: case GGML_OP_SUM_ROWS: diff --git a/ggml/src/ggml-cuda/im2col.cu b/ggml/src/ggml-cuda/im2col.cu index 16463ab0fb683..86a54e42bb7e6 100644 --- a/ggml/src/ggml-cuda/im2col.cu +++ b/ggml/src/ggml-cuda/im2col.cu @@ -91,9 +91,9 @@ void ggml_cuda_op_im2col(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { const int64_t OH = is_2D ? dst->ne[2] : 1; const int64_t OW = dst->ne[1]; - const size_t delta_offset = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32 - const int64_t batch = src1->ne[3]; - const size_t batch_offset = src1->nb[3] / 4; // nb is byte offset, src is type float32 + const size_t delta_offset = src1->nb[is_2D ? 2 : 1] / 4; // nb is byte offset, src is type float32 + const int64_t batch = src1->ne[is_2D ? 3 : 2]; + const size_t batch_offset = src1->nb[is_2D ? 3 : 2] / 4; // nb is byte offset, src is type float32 if(dst->type == GGML_TYPE_F16) { im2col_cuda_f16(src1_d, (half *) dst_d, IW, IH, OW, OH, KW, KH, IC, batch, batch_offset, delta_offset, s0, s1, p0, p1, d0, d1, stream); diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index e087f7ba5be16..7e769a91a1944 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -3308,15 +3308,41 @@ static std::vector> make_test_cases_eval() { } } - test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32)); - test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32)); - test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16)); - // test cases for 1D im2col + // im2col 1D test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false)); test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false)); test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {3000, 128, 1, 1}, {3, 128, 1280, 1}, 1, 0, 1, 0, 1, 0, false)); + for (int s0 : {1, 3}) { + for (int p0 : {0, 3}) { + for (int d0 : {1, 3}) { + test_cases.emplace_back(new test_im2col( + GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32, {20, 2, 2, 1}, {3, 2, 2, 1}, + s0, 0, p0, 0, d0, 0, false)); + } + } + } + + // im2col 2D + test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32)); + test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F32)); + test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16)); + for (int s0 : {1, 3}) { + for (int s1 : {1, 3}) { + for (int p0 : {0, 3}) { + for (int p1 : {0, 3}) { + for (int d0 : {1, 3}) { + for (int d1 : {1, 3}) { + test_cases.emplace_back(new test_im2col( + GGML_TYPE_F32, GGML_TYPE_F32, GGML_TYPE_F32, {20, 20, 2, 2}, {3, 3, 2, 2}, + s0, s1, p0, p1, d0, d1, true)); + } + } + } + } + } + } - // test cases for 2D im2col + // extra tests for im2col 2D test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 1, 32}, {3, 3, 1, 32}, 1, 1, 1, 1, 1, 1, true)); test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 2, 32}, {3, 3, 2, 32}, 1, 1, 1, 1, 1, 1, true)); test_cases.emplace_back(new test_im2col(GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_F16, {12, 12, 1, 1024}, {3, 3, 1, 1024}, 1, 1, 1, 1, 1, 1, true));