diff --git a/Testing/Temporary/CTestCostData.txt b/Testing/Temporary/CTestCostData.txt new file mode 100644 index 0000000000000..ed97d539c095c --- /dev/null +++ b/Testing/Temporary/CTestCostData.txt @@ -0,0 +1 @@ +--- diff --git a/docs/ops.md b/docs/ops.md index 0047ef3fa5e53..63f1e2e067162 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -107,4 +107,5 @@ Legend: | SWIGLU_OAI | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | TANH | ❌ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | 🟡 | ❌ | | TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | +| TRUNC | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | | UPSCALE | ❌ | 🟡 | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ❌ | diff --git a/docs/ops/CPU.csv b/docs/ops/CPU.csv index 21e0d1b3c9117..01a6ae1095d0b 100644 --- a/docs/ops/CPU.csv +++ b/docs/ops/CPU.csv @@ -1,6 +1,8 @@ "backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name" "CPU","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" +"CPU","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" +"CPU","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" @@ -61,6 +63,8 @@ "CPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","CPU" "CPU","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" +"CPU","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" +"CPU","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" "CPU","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","CPU" "CPU","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","CPU" diff --git a/docs/ops/SYCL.csv b/docs/ops/SYCL.csv index 5d022ee91aa7a..10f2061639f7c 100644 --- a/docs/ops/SYCL.csv +++ b/docs/ops/SYCL.csv @@ -1,6 +1,8 @@ "backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name" "SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" +"SYCL0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" +"SYCL0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" @@ -61,6 +63,8 @@ "SYCL0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" "SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" +"SYCL0","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" +"SYCL0","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 36b23dc6d0d82..1af83873ea016 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -574,6 +574,7 @@ extern "C" { GGML_UNARY_OP_HARDSIGMOID, GGML_UNARY_OP_EXP, GGML_UNARY_OP_GELU_ERF, + GGML_UNARY_OP_TRUNC, GGML_UNARY_OP_COUNT, }; @@ -1028,6 +1029,14 @@ extern "C" { GGML_API struct ggml_tensor * ggml_abs_inplace( struct ggml_context * ctx, struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_trunc( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_trunc_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a); GGML_API struct ggml_tensor * ggml_sgn( struct ggml_context * ctx, diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index c131290849538..f0f80e4a4b9fb 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -2169,6 +2169,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { case GGML_OP_UNARY: switch (ggml_get_unary_op(node)) { case GGML_UNARY_OP_ABS: + case GGML_UNARY_OP_TRUNC: case GGML_UNARY_OP_SGN: case GGML_UNARY_OP_NEG: case GGML_UNARY_OP_STEP: diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp index 763ab099e31a6..1837ef8642f78 100644 --- a/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp @@ -8933,6 +8933,10 @@ void ggml_compute_forward_unary( { ggml_compute_forward_abs(params, dst); } break; + case GGML_UNARY_OP_TRUNC: + { + ggml_compute_forward_trunc(params, dst); + } break; case GGML_UNARY_OP_SGN: { ggml_compute_forward_sgn(params, dst); diff --git a/ggml/src/ggml-cpu/unary-ops.cpp b/ggml/src/ggml-cpu/unary-ops.cpp index 4fce569b3bfc8..fe7deedd1a783 100644 --- a/ggml/src/ggml-cpu/unary-ops.cpp +++ b/ggml/src/ggml-cpu/unary-ops.cpp @@ -4,6 +4,10 @@ static inline float op_abs(float x) { return fabsf(x); } +static inline float op_trunc(float x) { + return truncf(x); +} + static inline float op_sgn(float x) { return (x > 0.f) ? 1.f : ((x < 0.f) ? -1.f : 0.f); } @@ -125,6 +129,10 @@ void ggml_compute_forward_abs(const ggml_compute_params * params, ggml_tensor * unary_op(params, dst); } +void ggml_compute_forward_trunc(const ggml_compute_params * params, ggml_tensor * dst) { + unary_op(params, dst); +} + void ggml_compute_forward_sgn(const ggml_compute_params * params, ggml_tensor * dst) { unary_op(params, dst); } diff --git a/ggml/src/ggml-cpu/unary-ops.h b/ggml/src/ggml-cpu/unary-ops.h index b1ade2c8e341f..5241f43e4458b 100644 --- a/ggml/src/ggml-cpu/unary-ops.h +++ b/ggml/src/ggml-cpu/unary-ops.h @@ -7,6 +7,7 @@ extern "C" { #endif void ggml_compute_forward_abs(const struct ggml_compute_params * params, struct ggml_tensor * dst); +void ggml_compute_forward_trunc(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_sgn(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_neg(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_step(const struct ggml_compute_params * params, struct ggml_tensor * dst); diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index c2da2fb48ad28..2fe226b7066f1 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -39,6 +39,11 @@ static __dpct_inline__ T op_abs(T x) { return sycl::fabs(x); } +template +static __dpct_inline__ T op_trunc(T x) { + return sycl::trunc(x); +} + template static __dpct_inline__ T op_elu(T x) { return (x > static_cast(0.f)) ? x : sycl::expm1(x); @@ -164,6 +169,13 @@ static void unary_op_abs_kernel(const T * x, T * dst, const int k, const sycl::n } } +template +static void unary_op_trunc_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_trunc(x[i]); + } +} + template static void unary_op_elu_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { SYCL_GLOBAL_ID_LOOP(k, item_ct1) { @@ -661,6 +673,19 @@ static inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor }); } +static inline void ggml_sycl_op_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, 256); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256), + sycl::range<1>(256)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_trunc_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); +} + static inline void ggml_sycl_op_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { @@ -1139,6 +1164,11 @@ void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { ggml_sycl_op_abs(ctx, dst); } +void ggml_sycl_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); + ggml_sycl_op_trunc(ctx, dst); +} + void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); ggml_sycl_op_elu(ctx, dst); diff --git a/ggml/src/ggml-sycl/element_wise.hpp b/ggml/src/ggml-sycl/element_wise.hpp index 50749e87d783e..eb517e020706a 100644 --- a/ggml/src/ggml-sycl/element_wise.hpp +++ b/ggml/src/ggml-sycl/element_wise.hpp @@ -75,6 +75,8 @@ void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst); +void ggml_sycl_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst); + void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_geglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 78853eb67671c..bb8bc36aeec3b 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -3636,6 +3636,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg case GGML_UNARY_OP_ABS: ggml_sycl_abs(ctx, dst); break; + case GGML_UNARY_OP_TRUNC: + ggml_sycl_trunc(ctx, dst); + break; case GGML_UNARY_OP_ELU: ggml_sycl_elu(ctx, dst); break; @@ -4192,6 +4195,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g case GGML_UNARY_OP_EXP: case GGML_UNARY_OP_SGN: case GGML_UNARY_OP_ABS: + case GGML_UNARY_OP_TRUNC: case GGML_UNARY_OP_ELU: #if defined (GGML_SYCL_F16) return ggml_is_contiguous(op->src[0]) && (op->type == op->src[0]->type); diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp index 74a4794d34f9f..58a26c43f62de 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp @@ -928,7 +928,7 @@ void write_output_files() { std::vector btypes = {"f16", "f32"}; #if defined(GGML_VULKAN_INTEGER_DOT_GLSLC_SUPPORT) - btypes.push_back("q8_1"); + btypes.push_back("q8_1"); #endif for (const std::string& btype : btypes) { diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 3584827dca7fc..72f57654b0918 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -1143,9 +1143,10 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = { "HARDSIGMOID", "EXP", "GELU_ERF", + "TRUNC", }; -static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15"); +static_assert(GGML_UNARY_OP_COUNT == 16, "GGML_UNARY_OP_COUNT != 16"); static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = { @@ -2481,6 +2482,20 @@ struct ggml_tensor * ggml_abs_inplace( return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS); } +// ggml_trunc + +struct ggml_tensor * ggml_trunc( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC); +} + +struct ggml_tensor * ggml_trunc_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC); +} + // ggml_sgn struct ggml_tensor * ggml_sgn( diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 507b691dc96e2..3412decbd86f6 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -3567,6 +3567,49 @@ struct test_sin : public test_case { } }; +// GGML_OP_TRUNC +// Note: TRUNC is not differentiable, so gradient tests are skipped. +struct test_trunc : public test_case { + const ggml_type type; + const std::array ne; + + std::string vars() override { + return VARS_TO_STR2(type, ne); + } + + test_trunc(ggml_type type = GGML_TYPE_F32, + std::array ne = {10, 2, 2, 2}) + : type(type), ne(ne) {} + + ggml_tensor * build_graph(ggml_context * ctx) override { + ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data()); + ggml_set_param(a); + ggml_set_name(a, "a"); + + ggml_tensor * out = ggml_trunc(ctx, a); + ggml_set_name(out, "out"); + + return out; + } + + void initialize_tensors(ggml_context * ctx) override { + for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + init_tensor_uniform(t, -6.5f, 6.5f); // Covers interval [-2*pi, 2*pi]. + } + } + + double max_maa_err() override { + if (type == GGML_TYPE_F16 || type == GGML_TYPE_BF16) { + return 1e-3; + } + return 1e-6; + } + + bool grad_precise() override { + return false; + } +}; + // GGML_OP_COS struct test_cos : public test_case { const ggml_type type; @@ -6329,6 +6372,7 @@ static std::vector> make_test_cases_eval() { } for (ggml_type type : {GGML_TYPE_F16, GGML_TYPE_F32}) { + test_cases.emplace_back(new test_sqr (type)); test_cases.emplace_back(new test_sqrt (type)); test_cases.emplace_back(new test_log (type)); @@ -6336,6 +6380,7 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_cos (type)); test_cases.emplace_back(new test_clamp (type)); test_cases.emplace_back(new test_leaky_relu(type)); + test_cases.emplace_back(new test_trunc (type)); test_cases.emplace_back(new test_sqr (type, {7, 1, 5, 3})); test_cases.emplace_back(new test_sqrt (type, {7, 1, 5, 3})); test_cases.emplace_back(new test_log (type, {7, 1, 5, 3})); @@ -6343,6 +6388,7 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_cos (type, {7, 1, 5, 3})); test_cases.emplace_back(new test_clamp (type, {7, 1, 5, 3})); test_cases.emplace_back(new test_leaky_relu(type, {7, 1, 5, 3})); + test_cases.emplace_back(new test_trunc (type, {7, 1, 5, 3})); } test_cases.emplace_back(new test_diag_mask_inf(GGML_TYPE_F32, {10, 10, 1, 1}, 5)); diff --git a/trunc_support.csv b/trunc_support.csv new file mode 100644 index 0000000000000..ac94446e88df5 --- /dev/null +++ b/trunc_support.csv @@ -0,0 +1 @@ +"backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name"