From be8a26f48711d9ac8c8d25bea56c1d0f84d59007 Mon Sep 17 00:00:00 2001 From: Mryange Date: Thu, 28 May 2026 15:27:48 +0800 Subject: [PATCH 1/2] upd --- be/src/core/column/column_const.h | 3 +- .../function/function_quantile_state.cpp | 2 + be/src/exprs/function/function_regexp.cpp | 20 ++++--- be/src/exprs/function/uniform.cpp | 2 + be/test/core/column/column_const_test.cpp | 13 +++++ be/test/exprs/function/function_math_test.cpp | 58 +++++++++++++++++++ .../function/function_quantile_state_test.cpp | 17 ++++++ .../exprs/function/function_string_test.cpp | 16 +++++ 8 files changed, 122 insertions(+), 9 deletions(-) diff --git a/be/src/core/column/column_const.h b/be/src/core/column/column_const.h index 7f648ece468dd1..f1f90d64d6c9ac 100644 --- a/be/src/core/column/column_const.h +++ b/be/src/core/column/column_const.h @@ -124,7 +124,8 @@ class ColumnConst final : public COWHelper { void resize(size_t new_size) override { s = new_size; } MutableColumnPtr clone_resized(size_t new_size) const override { - return ColumnConst::create(data, new_size, false, false); + auto cloned_data = data->clone_resized(data->size()); + return ColumnConst::create(std::move(cloned_data), new_size, false, false); } size_t size() const override { return s; } diff --git a/be/src/exprs/function/function_quantile_state.cpp b/be/src/exprs/function/function_quantile_state.cpp index eed78b97da9e71..f8b383ed15a6ef 100644 --- a/be/src/exprs/function/function_quantile_state.cpp +++ b/be/src/exprs/function/function_quantile_state.cpp @@ -162,6 +162,8 @@ class FunctionQuantileStatePercent : public IFunction { bool use_default_implementation_for_nulls() const override { return false; } + ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, uint32_t result, size_t input_rows_count) const override { auto res_data_column = ColumnFloat64::create(); diff --git a/be/src/exprs/function/function_regexp.cpp b/be/src/exprs/function/function_regexp.cpp index 65d8dd254477ff..f40d49e56d868e 100644 --- a/be/src/exprs/function/function_regexp.cpp +++ b/be/src/exprs/function/function_regexp.cpp @@ -33,6 +33,7 @@ #include "core/block/column_numbers.h" #include "core/block/column_with_type_and_name.h" #include "core/column/column.h" +#include "core/column/column_execute_util.h" #include "core/column/column_const.h" #include "core/column/column_nullable.h" #include "core/column/column_string.h" @@ -188,23 +189,26 @@ struct RegexpExtractEngine { }; struct RegexpCountImpl { + using StringColumnView = ColumnView; + static void execute_impl(FunctionContext* context, ColumnPtr argument_columns[], size_t input_rows_count, ColumnInt32::Container& result_data) { - const auto* str_col = check_and_get_column(argument_columns[0].get()); - const auto* pattern_col = check_and_get_column(argument_columns[1].get()); - for (int i = 0; i < input_rows_count; ++i) { + auto str_col = StringColumnView::create(argument_columns[0]); + auto pattern_col = StringColumnView::create(argument_columns[1]); + for (size_t i = 0; i < input_rows_count; ++i) { + DCHECK(!str_col.is_null_at(i)); + DCHECK(!pattern_col.is_null_at(i)); result_data[i] = _execute_inner_loop(context, str_col, pattern_col, i); } } - static int _execute_inner_loop(FunctionContext* context, const ColumnString* str_col, - const ColumnString* pattern_col, const size_t index_now) { + static int _execute_inner_loop(FunctionContext* context, const StringColumnView& str_col, + const StringColumnView& pattern_col, const size_t index_now) { re2::RE2* re = reinterpret_cast( context->get_function_state(FunctionContext::THREAD_LOCAL)); std::unique_ptr scoped_re; if (re == nullptr) { std::string error_str; - DCHECK(pattern_col); - const auto& pattern = pattern_col->get_data_at(index_check_const(index_now, false)); + const auto pattern = pattern_col.value_at(index_now); bool st = StringFunctions::compile_regex(pattern, &error_str, StringRef(), StringRef(), scoped_re); if (!st) { @@ -215,7 +219,7 @@ struct RegexpCountImpl { re = scoped_re.get(); } - const auto& str = str_col->get_data_at(index_now); + const auto str = str_col.value_at(index_now); int count = 0; size_t pos = 0; while (pos < str.size) { diff --git a/be/src/exprs/function/uniform.cpp b/be/src/exprs/function/uniform.cpp index 713d0f5c3ac511..9f1dd3ad0738ab 100644 --- a/be/src/exprs/function/uniform.cpp +++ b/be/src/exprs/function/uniform.cpp @@ -147,6 +147,8 @@ class FunctionUniform : public IFunction { static FunctionPtr create() { return std::make_shared>(); } String get_name() const override { return name; } + bool use_default_implementation_for_constants() const override { return false; } + size_t get_number_of_arguments() const override { return get_variadic_argument_types_impl().size(); } diff --git a/be/test/core/column/column_const_test.cpp b/be/test/core/column/column_const_test.cpp index f6f81ec3aaba4f..e9f57df213bce3 100644 --- a/be/test/core/column/column_const_test.cpp +++ b/be/test/core/column/column_const_test.cpp @@ -41,6 +41,19 @@ TEST(ColumnConstTest, TestCreate) { EXPECT_TRUE(!is_column_const(column_const2->get_data_column())); } +TEST(ColumnConstTest, clone_resized_clones_nested_data) { + auto column_data = ColumnHelper::create_column({7}); + auto column_const = ColumnConst::create(column_data, 3); + + auto cloned = column_const->clone_resized(5); + const auto& cloned_const = assert_cast(*cloned); + + EXPECT_EQ(cloned_const.size(), 5); + EXPECT_EQ(cloned_const.get_data_column_ptr()->size(), 1); + EXPECT_EQ(cloned_const.get_data_column().get_int(0), 7); + EXPECT_NE(column_const->get_data_column_ptr().get(), cloned_const.get_data_column_ptr().get()); +} + TEST(ColumnConstTest, TestFilter) { { auto column_data = ColumnHelper::create_column({7}); diff --git a/be/test/exprs/function/function_math_test.cpp b/be/test/exprs/function/function_math_test.cpp index 4e51a5dc3e700b..cf1b3a442ea686 100644 --- a/be/test/exprs/function/function_math_test.cpp +++ b/be/test/exprs/function/function_math_test.cpp @@ -18,14 +18,17 @@ #include #include #include +#include #include +#include "core/column/column_const.h" #include "core/data_type/data_type_decimal.h" #include "core/data_type/data_type_number.h" #include "core/data_type/data_type_string.h" #include "core/types.h" #include "exprs/function/function_test_util.h" #include "testutil/any_type.h" +#include "testutil/column_helper.h" namespace doris { @@ -532,6 +535,11 @@ TEST(MathFunctionTest, hex_test) { } TEST(MathFunctionTest, random_test) { +#ifndef NDEBUG + GTEST_SKIP() << "random(seed) exact-value assertions are release-only; debug builds run " + "mock_const_execute before the real call."; +#endif + std::string func_name = "random"; // random(x) InputTypeSet input_types = {Consted {PrimitiveType::TYPE_BIGINT}}; DataSet data_set = {{{Null()}, Null()}, @@ -547,6 +555,56 @@ TEST(MathFunctionTest, random_test) { } } +TEST(MathFunctionTest, uniform_mixed_const_probe_test) { + auto input_type = std::make_shared(); + auto return_type = std::make_shared(); + + Block block; + auto min_data = ColumnHelper::create_column({1}); + auto max_data = ColumnHelper::create_column({10}); + auto seed_column = ColumnHelper::create_column({101, 202, 303}); + + block.insert({ColumnConst::create(min_data, 3), input_type, "min"}); + block.insert({ColumnConst::create(max_data, 3), input_type, "max"}); + block.insert({seed_column, input_type, "seed"}); + + FunctionBasePtr function = SimpleFunctionFactory::instance().get_function( + "uniform", block.get_columns_with_type_and_name(), return_type); + ASSERT_TRUE(function != nullptr); + + block.insert({nullptr, return_type, "result"}); + + FunctionUtils fn_utils(return_type, {input_type, input_type, input_type}, false); + auto* fn_ctx = fn_utils.get_fn_ctx(); + std::vector> constant_cols { + std::make_shared(block.get_by_position(0).column), + std::make_shared(block.get_by_position(1).column), + nullptr, + }; + fn_ctx->set_constant_cols(constant_cols); + + ASSERT_TRUE(function->open(fn_ctx, FunctionContext::FRAGMENT_LOCAL).ok()); + ASSERT_TRUE(function->open(fn_ctx, FunctionContext::THREAD_LOCAL).ok()); + + auto exec_status = function->execute(fn_ctx, block, {0, 1, 2}, 3, 3); + + static_cast(function->close(fn_ctx, FunctionContext::THREAD_LOCAL)); + static_cast(function->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL)); + + ASSERT_TRUE(exec_status.ok()) << exec_status.to_string(); + + const auto& result_column = assert_cast(*block.get_by_position(3).column); + auto expected_uniform = [](int64_t seed) { + std::mt19937_64 generator(seed); + std::uniform_int_distribution distribution(1, 10); + return distribution(generator); + }; + + EXPECT_EQ(result_column.get_element(0), expected_uniform(101)); + EXPECT_EQ(result_column.get_element(1), expected_uniform(202)); + EXPECT_EQ(result_column.get_element(2), expected_uniform(303)); +} + TEST(MathFunctionTest, conv_test) { std::string func_name = "conv"; diff --git a/be/test/exprs/function/function_quantile_state_test.cpp b/be/test/exprs/function/function_quantile_state_test.cpp index 1cb1ced1dae561..e8f2fca702895f 100644 --- a/be/test/exprs/function/function_quantile_state_test.cpp +++ b/be/test/exprs/function/function_quantile_state_test.cpp @@ -213,4 +213,21 @@ TEST(function_quantile_state_test, function_quantile_state_roundtrip) { 0.01); } +TEST(function_quantile_state_test, function_quantile_percent_mixed_const_test) { + std::string func_name = "quantile_percent"; + InputTypeSet input_types = {PrimitiveType::TYPE_QUANTILE_STATE, + ConstedNotnull {PrimitiveType::TYPE_FLOAT}}; + + QuantileState quantile_state; + quantile_state.add_value(1.0); + quantile_state.add_value(2.0); + quantile_state.add_value(3.0); + quantile_state.add_value(4.0); + quantile_state.add_value(5.0); + + DataSet data_set = {{{&quantile_state, 0.5F}, 3.0}}; + + static_cast(check_function(func_name, input_types, data_set)); +} + } // namespace doris diff --git a/be/test/exprs/function/function_string_test.cpp b/be/test/exprs/function/function_string_test.cpp index 53df245904c93a..2e1aaa839c464e 100644 --- a/be/test/exprs/function/function_string_test.cpp +++ b/be/test/exprs/function/function_string_test.cpp @@ -4003,4 +4003,20 @@ TEST(function_string_test, function_unicode_normalize_invalid_mode) { EXPECT_NE(Status::OK(), st); } +TEST(function_string_test, function_regexp_count_mixed_const_test) { + std::string func_name = "regexp_count"; + + InputTypeSet input_types = {PrimitiveType::TYPE_VARCHAR, PrimitiveType::TYPE_VARCHAR}; + DataSet data_set = { + {{std::string("a.b:c;d"), std::string("[.:;]")}, std::int32_t(3)}, + {{std::string("a1b2346c3d"), std::string("\\d+")}, std::int32_t(3)}, + {{std::string("abcd"), std::string("")}, std::int32_t(0)}, + {{std::string("book keeper"), std::string("oo|ee")}, std::int32_t(2)}, + {{Null(), std::string("\\d+")}, Null()}, + {{std::string("abcd"), Null()}, Null()}, + }; + + check_function_all_arg_comb(func_name, input_types, data_set); +} + } // namespace doris From 5dc83dfa6c37886b59b8842f06672bd8af907788 Mon Sep 17 00:00:00 2001 From: Mryange Date: Thu, 28 May 2026 15:33:50 +0800 Subject: [PATCH 2/2] format --- be/src/exprs/function/function_regexp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/exprs/function/function_regexp.cpp b/be/src/exprs/function/function_regexp.cpp index f40d49e56d868e..0476336f7ca8f2 100644 --- a/be/src/exprs/function/function_regexp.cpp +++ b/be/src/exprs/function/function_regexp.cpp @@ -33,8 +33,8 @@ #include "core/block/column_numbers.h" #include "core/block/column_with_type_and_name.h" #include "core/column/column.h" -#include "core/column/column_execute_util.h" #include "core/column/column_const.h" +#include "core/column/column_execute_util.h" #include "core/column/column_nullable.h" #include "core/column/column_string.h" #include "core/column/column_vector.h"