diff --git a/docs/ops.md b/docs/ops.md index 0047ef3fa5e53..32b8e4302c7a2 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -82,6 +82,7 @@ Legend: | ROLL | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | | ROPE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | | ROPE_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | +| ROUND | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | | RWKV_WKV6 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | RWKV_WKV7 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | SCALE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | diff --git a/docs/ops/CPU.csv b/docs/ops/CPU.csv index 21e0d1b3c9117..49e2b04f69519 100644 --- a/docs/ops/CPU.csv +++ b/docs/ops/CPU.csv @@ -1,6 +1,8 @@ "backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name" "CPU","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" +"CPU","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" +"CPU","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" @@ -61,6 +63,8 @@ "CPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU" "CPU","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" +"CPU","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" +"CPU","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" diff --git a/docs/ops/SYCL.csv b/docs/ops/SYCL.csv index 5d022ee91aa7a..226bcd28b6eb1 100644 --- a/docs/ops/SYCL.csv +++ b/docs/ops/SYCL.csv @@ -1,6 +1,8 @@ "backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name" "SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" +"SYCL0","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" +"SYCL0","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" @@ -61,6 +63,8 @@ "SYCL0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" "SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" +"SYCL0","ROUND","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" +"SYCL0","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 36b23dc6d0d82..0401f054a087f 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -560,6 +560,7 @@ extern "C" { enum ggml_unary_op { GGML_UNARY_OP_ABS, + GGML_UNARY_OP_ROUND, GGML_UNARY_OP_SGN, GGML_UNARY_OP_NEG, GGML_UNARY_OP_STEP, @@ -1028,6 +1029,13 @@ extern "C" { GGML_API struct ggml_tensor * ggml_abs_inplace( struct ggml_context * ctx, struct ggml_tensor * a); + GGML_API struct ggml_tensor * ggml_round( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_round_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_sgn( struct ggml_context * ctx, diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp index 763ab099e31a6..080ba9cf8b4f3 100644 --- a/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp @@ -8933,6 +8933,10 @@ void ggml_compute_forward_unary( { ggml_compute_forward_abs(params, dst); } break; + case GGML_UNARY_OP_ROUND: + { + ggml_compute_forward_round(params, dst); + } break; case GGML_UNARY_OP_SGN: { ggml_compute_forward_sgn(params, dst); diff --git a/ggml/src/ggml-cpu/unary-ops.cpp b/ggml/src/ggml-cpu/unary-ops.cpp index 4fce569b3bfc8..0c82e82b3a7c7 100644 --- a/ggml/src/ggml-cpu/unary-ops.cpp +++ b/ggml/src/ggml-cpu/unary-ops.cpp @@ -4,6 +4,10 @@ static inline float op_abs(float x) { return fabsf(x); } +static inline float op_round(float x) { + return roundf(x); +} + static inline float op_sgn(float x) { return (x > 0.f) ? 1.f : ((x < 0.f) ? -1.f : 0.f); } @@ -125,6 +129,10 @@ void ggml_compute_forward_abs(const ggml_compute_params * params, ggml_tensor * unary_op(params, dst); } +void ggml_compute_forward_round(const ggml_compute_params * params, ggml_tensor * dst) { + unary_op(params, dst); +} + void ggml_compute_forward_sgn(const ggml_compute_params * params, ggml_tensor * dst) { unary_op(params, dst); } diff --git a/ggml/src/ggml-cpu/unary-ops.h b/ggml/src/ggml-cpu/unary-ops.h index b1ade2c8e341f..c65f97e4b2c52 100644 --- a/ggml/src/ggml-cpu/unary-ops.h +++ b/ggml/src/ggml-cpu/unary-ops.h @@ -7,6 +7,7 @@ extern "C" { #endif void ggml_compute_forward_abs(const struct ggml_compute_params * params, struct ggml_tensor * dst); +void ggml_compute_forward_round(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_sgn(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_neg(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_step(const struct ggml_compute_params * params, struct ggml_tensor * dst); diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index c2da2fb48ad28..cc769b0a01b4d 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -39,6 +39,11 @@ static __dpct_inline__ T op_abs(T x) { return sycl::fabs(x); } +template +static __dpct_inline__ T op_round(T x) { + return sycl::round(x); +} + template static __dpct_inline__ T op_elu(T x) { return (x > static_cast(0.f)) ? x : sycl::expm1(x); @@ -164,6 +169,13 @@ static void unary_op_abs_kernel(const T * x, T * dst, const int k, const sycl::n } } +template +static void unary_op_round_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_round(x[i]); + } +} + template static void unary_op_elu_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { SYCL_GLOBAL_ID_LOOP(k, item_ct1) { @@ -661,6 +673,19 @@ static inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor }); } +static inline void ggml_sycl_op_round(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, 256); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256), + sycl::range<1>(256)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_round_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); +} + static inline void ggml_sycl_op_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { @@ -1139,6 +1164,11 @@ void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { ggml_sycl_op_abs(ctx, dst); } +void ggml_sycl_round(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); + ggml_sycl_op_round(ctx, dst); +} + void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); ggml_sycl_op_elu(ctx, dst); diff --git a/ggml/src/ggml-sycl/element_wise.hpp b/ggml/src/ggml-sycl/element_wise.hpp index 50749e87d783e..b10122af965f0 100644 --- a/ggml/src/ggml-sycl/element_wise.hpp +++ b/ggml/src/ggml-sycl/element_wise.hpp @@ -75,6 +75,8 @@ void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst); +void ggml_sycl_round(ggml_backend_sycl_context & ctx, ggml_tensor * dst); + void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_geglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 78853eb67671c..161aaa7a3592f 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -3636,6 +3636,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg case GGML_UNARY_OP_ABS: ggml_sycl_abs(ctx, dst); break; + case GGML_UNARY_OP_ROUND: + ggml_sycl_round(ctx, dst); + break; case GGML_UNARY_OP_ELU: ggml_sycl_elu(ctx, dst); break; @@ -4192,6 +4195,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g case GGML_UNARY_OP_EXP: case GGML_UNARY_OP_SGN: case GGML_UNARY_OP_ABS: + case GGML_UNARY_OP_ROUND: case GGML_UNARY_OP_ELU: #if defined (GGML_SYCL_F16) return ggml_is_contiguous(op->src[0]) && (op->type == op->src[0]->type); diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 3584827dca7fc..6258d0a843cc1 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -1129,6 +1129,7 @@ static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2"); static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = { "ABS", + "ROUND", "SGN", "NEG", "STEP", @@ -1145,7 +1146,7 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = { "GELU_ERF", }; -static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15"); +static_assert(GGML_UNARY_OP_COUNT == 16, "GGML_UNARY_OP_COUNT != 16"); static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = { @@ -2481,6 +2482,20 @@ struct ggml_tensor * ggml_abs_inplace( return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS); } +// ggml_round + +struct ggml_tensor * ggml_round( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND); +} + +struct ggml_tensor * ggml_round_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND); +} + // ggml_sgn struct ggml_tensor * ggml_sgn( diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 507b691dc96e2..e480b3420464e 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -3567,6 +3567,45 @@ struct test_sin : public test_case { } }; +// GGML_OP_ROUND +struct test_round : public test_case { + const ggml_type type; + const std::array ne; + + std::string vars() override { + return VARS_TO_STR2(type, ne); + } + + test_round(ggml_type type = GGML_TYPE_F32, + std::array ne = {10, 5, 4, 3}) + : type(type), ne(ne) {} + + ggml_tensor * build_graph(ggml_context * ctx) override { + ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data()); + ggml_set_param(a); + ggml_set_name(a, "a"); + + ggml_tensor * out = ggml_round(ctx, a); + ggml_set_name(out, "out"); + + return out; + } + + void initialize_tensors(ggml_context * ctx) override { + for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + init_tensor_uniform(t, -10.0f, 10.0f); + } + } + + float grad_eps() override { + return 1.0f; + } + + bool grad_precise() override { + return false; + } +}; + // GGML_OP_COS struct test_cos : public test_case { const ggml_type type; @@ -6329,6 +6368,7 @@ static std::vector> make_test_cases_eval() { } for (ggml_type type : {GGML_TYPE_F16, GGML_TYPE_F32}) { + test_cases.emplace_back(new test_sqr (type)); test_cases.emplace_back(new test_sqrt (type)); test_cases.emplace_back(new test_log (type)); @@ -6336,13 +6376,14 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_cos (type)); test_cases.emplace_back(new test_clamp (type)); test_cases.emplace_back(new test_leaky_relu(type)); + test_cases.emplace_back(new test_round (type)); test_cases.emplace_back(new test_sqr (type, {7, 1, 5, 3})); test_cases.emplace_back(new test_sqrt (type, {7, 1, 5, 3})); test_cases.emplace_back(new test_log (type, {7, 1, 5, 3})); test_cases.emplace_back(new test_sin (type, {7, 1, 5, 3})); test_cases.emplace_back(new test_cos (type, {7, 1, 5, 3})); test_cases.emplace_back(new test_clamp (type, {7, 1, 5, 3})); - test_cases.emplace_back(new test_leaky_relu(type, {7, 1, 5, 3})); + test_cases.emplace_back(new test_round (type, {7, 1, 5, 3})); } test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10, 1, 1}, 5));