diff --git a/be/src/core/column/column_string.h b/be/src/core/column/column_string.h index 5a1537ff2c04eb..89d748e42fec56 100644 --- a/be/src/core/column/column_string.h +++ b/be/src/core/column/column_string.h @@ -298,7 +298,10 @@ class ColumnStr final : public COWHelper> { running_offset += lengths[i]; offsets_ptr[i] = static_cast(running_offset); } - chars.resize(offsets[old_rows + num - 1]); + // OFFSET_ONLY columns carry valid offsets but no real string payload. Use non-zero + // placeholders so char-padding shrink logic cannot recompute these offsets as zero-length + // strings when this column is nested under a struct that also contains CHAR fields. + chars.resize_fill(offsets[old_rows + num - 1], 1); } void insert_many_strings(const StringRef* strings, size_t num) override { diff --git a/be/src/exprs/function/function_struct_element.cpp b/be/src/exprs/function/function_struct_element.cpp index 770698259ee1be..bf38a09268d8bf 100644 --- a/be/src/exprs/function/function_struct_element.cpp +++ b/be/src/exprs/function/function_struct_element.cpp @@ -101,9 +101,9 @@ class FunctionStructElement : public IFunction { size_t index; RETURN_IF_ERROR(get_element_index(*struct_type, index_column, index_type, &index)); ColumnPtr res_column = struct_col->get_column_ptr(index); - ColumnPtr ele_column = res_column->clone_resized(res_column->size()); - //This function must return a ColumnNullable column, so it is necessary to convert the result column into ColumnNullable. - block.replace_by_position(result, make_nullable(ele_column)); + // This function must return a ColumnNullable column, so it is necessary to convert the + // result column into ColumnNullable. + block.replace_by_position(result, make_nullable(res_column)); return Status::OK(); } diff --git a/regression-test/data/nereids_rules_p0/column_pruning/string_length_column_pruning.out b/regression-test/data/nereids_rules_p0/column_pruning/string_length_column_pruning.out index 36b77d17167af3..0778c2a7afc352 100644 --- a/regression-test/data/nereids_rules_p0/column_pruning/string_length_column_pruning.out +++ b/regression-test/data/nereids_rules_p0/column_pruning/string_length_column_pruning.out @@ -1,4 +1,22 @@ -- This file is automatically generated. You should know what you did if you want to edit this +-- !struct_offset_group_min_by -- +\N v6 +1 v0 +2 v1 +3 v2 +4 v3 +5 v4 +6 v5 + +-- !struct_offset_group_count -- +\N 1 +1 1 +2 2 +3 2 +4 2 +5 1 +6 1 + -- !array_full_access_strips_offset -- 1 3 [1, 2, 3] diff --git a/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy b/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy index 2a4d56b570b40e..660461864601b6 100644 --- a/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy +++ b/regression-test/suites/nereids_rules_p0/column_pruning/string_length_column_pruning.groovy @@ -90,6 +90,53 @@ suite("string_length_column_pruning") { notContains "type=bigint" } sql "select length(struct_element(struct_col, 'f3')) from slcp_str_tbl" + + sql """ DROP TABLE IF EXISTS slcp_struct_offset_group_tbl """ + sql """ + CREATE TABLE slcp_struct_offset_group_tbl ( + id INT, + val STRING, + s STRUCT + ) ENGINE = OLAP + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 3 + PROPERTIES ("replication_allocation" = "tag.location.default: 1") + """ + sql """ + INSERT INTO slcp_struct_offset_group_tbl VALUES + (0, 'v0', named_struct('c', 'x', 'b', 'a')), + (1, 'v1', named_struct('c', 'x', 'b', 'bb')), + (2, 'v2', named_struct('c', 'x', 'b', 'ccc')), + (3, 'v3', named_struct('c', 'x', 'b', 'dddd')), + (4, 'v4', named_struct('c', 'x', 'b', 'eeeee')), + (5, 'v5', named_struct('c', 'x', 'b', 'ffffff')), + (6, 'v6', named_struct('c', 'x', 'b', NULL)), + (7, 'v7', named_struct('c', 'x', 'b', 'gg')), + (8, 'v8', named_struct('c', 'x', 'b', 'hhhh')), + (9, 'v9', named_struct('c', 'x', 'b', 'iii')) + """ + explain { + sql """ + select length(struct_element(s, 'b')), min_by(val, id) + from slcp_struct_offset_group_tbl + group by 1 + """ + contains "nested columns" + contains "s.b.OFFSET" + } + order_qt_struct_offset_group_min_by """ + select length(struct_element(s, 'b')), min_by(val, id) + from slcp_struct_offset_group_tbl + group by 1 + order by 1, 2 + """ + order_qt_struct_offset_group_count """ + select length(struct_element(s, 'b')), count(*) + from slcp_struct_offset_group_tbl + group by 1 + order by 1 + """ + // length() in both SELECT and WHERE: predicate must remain length(str_col) > 1, // never be rewritten to CAST(str_col AS int) > 1. Slot type must stay varchar. explain {