-
Notifications
You must be signed in to change notification settings - Fork 3.8k
[refine](column) replace field-based nested default with column-level operation in ColumnVariant #62547
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[refine](column) replace field-based nested default with column-level operation in ColumnVariant #62547
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2488,64 +2488,33 @@ bool ColumnVariant::try_insert_many_defaults_from_nested(const Subcolumns::NodeP | |
| return true; | ||
| } | ||
|
|
||
| /// Visitor that keeps @num_dimensions_to_keep dimensions in arrays | ||
| /// and replaces all scalars or nested arrays to @replacement at that level. | ||
| class FieldVisitorReplaceScalars : public StaticVisitor<Field> { | ||
| public: | ||
| FieldVisitorReplaceScalars(const Field& replacement_, size_t num_dimensions_to_keep_) | ||
| : replacement(replacement_), num_dimensions_to_keep(num_dimensions_to_keep_) {} | ||
|
|
||
| template <PrimitiveType T> | ||
| Field apply(const typename PrimitiveTypeTraits<T>::CppType& x) const { | ||
| if constexpr (T == TYPE_ARRAY) { | ||
| if (num_dimensions_to_keep == 0) { | ||
| return replacement; | ||
| } | ||
|
|
||
| const size_t size = x.size(); | ||
| Array res(size); | ||
| for (size_t i = 0; i < size; ++i) { | ||
| res[i] = apply_visitor( | ||
| FieldVisitorReplaceScalars(replacement, num_dimensions_to_keep - 1), x[i]); | ||
| } | ||
| return Field::create_field<TYPE_ARRAY>(res); | ||
| } else { | ||
| return replacement; | ||
| } | ||
| } | ||
|
|
||
| private: | ||
| const Field& replacement; | ||
| size_t num_dimensions_to_keep; | ||
| }; | ||
|
|
||
| bool ColumnVariant::try_insert_default_from_nested(const Subcolumns::NodePtr& entry) const { | ||
| const auto* leaf = get_leaf_of_the_same_nested(entry); | ||
| if (!leaf) { | ||
| return false; | ||
| } | ||
|
|
||
| auto last_field = leaf->data.get_last_field(); | ||
| if (last_field.is_null()) { | ||
| size_t leaf_size = leaf->data.size(); | ||
| if (leaf_size == 0) { | ||
| return false; | ||
| } | ||
|
|
||
| size_t leaf_num_dimensions = leaf->data.get_dimensions(); | ||
| size_t entry_num_dimensions = entry->data.get_dimensions(); | ||
|
|
||
| if (entry_num_dimensions > leaf_num_dimensions) { | ||
| throw doris::Exception( | ||
| ErrorCode::INVALID_ARGUMENT, | ||
| "entry_num_dimensions > leaf_num_dimensions, entry_num_dimensions={}, " | ||
| "leaf_num_dimensions={}", | ||
| entry_num_dimensions, leaf_num_dimensions); | ||
| // Preserve the original null guard: if the donor leaf's last row is NULL, | ||
| // fall back to insert_default() so that a missing sibling stays NULL too. | ||
| if (leaf->data.is_null_at(leaf_size - 1)) { | ||
| return false; | ||
| } | ||
|
|
||
| auto default_scalar = entry->data.get_least_common_type()->get_default(); | ||
| FieldInfo field_info = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This changes the single-row path from inserting a typed default |
||
| .scalar_type_id = entry->data.least_common_type.get_base_type_id(), | ||
| .num_dimensions = entry->data.get_dimensions(), | ||
| }; | ||
|
|
||
| auto default_field = apply_visitor( | ||
| FieldVisitorReplaceScalars(default_scalar, leaf_num_dimensions), last_field); | ||
| entry->data.insert(std::move(default_field)); | ||
| // Reuse the same column-level approach as try_insert_many_defaults_from_nested: | ||
| // cut the last row from leaf, replace scalar values with defaults, keep array structure. | ||
| auto new_subcolumn = leaf->data.cut(leaf_size - 1, 1).clone_with_default_values(field_info); | ||
| entry->data.insert_range_from(new_subcolumn, 0, 1); | ||
| ENABLE_CHECK_CONSISTENCY(this); | ||
| return true; | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3082,30 +3082,6 @@ TEST_F(ColumnVariantTest, get_field_info_all_types) { | |
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The patch removes the only direct test around the deleted transformation logic, but it does not add any replacement coverage for the behavior that changed in |
||
| } | ||
|
|
||
| TEST_F(ColumnVariantTest, field_visitor) { | ||
| // Test replacing scalar values in a flat array | ||
| { | ||
| Array array; | ||
| array.push_back(Field::create_field<TYPE_BIGINT>(Int64(1))); | ||
| array.push_back(Field::create_field<TYPE_BIGINT>(Int64(2))); | ||
| array.push_back(Field::create_field<TYPE_BIGINT>(Int64(3))); | ||
|
|
||
| Field field = Field::create_field<TYPE_ARRAY>(std::move(array)); | ||
| Field replacement = Field::create_field<TYPE_BIGINT>(Int64(42)); | ||
| Field result = apply_visitor(FieldVisitorReplaceScalars(replacement, 0), field); | ||
|
|
||
| EXPECT_EQ(result.get<TYPE_BIGINT>(), 42); | ||
|
|
||
| Field replacement1 = Field::create_field<TYPE_BIGINT>(Int64(42)); | ||
| Field result1 = apply_visitor(FieldVisitorReplaceScalars(replacement, 1), field); | ||
|
|
||
| EXPECT_EQ(result1.get<TYPE_ARRAY>().size(), 3); | ||
| EXPECT_EQ(result1.get<TYPE_ARRAY>()[0].get<TYPE_BIGINT>(), 42); | ||
| EXPECT_EQ(result1.get<TYPE_ARRAY>()[1].get<TYPE_BIGINT>(), 42); | ||
| EXPECT_EQ(result1.get<TYPE_ARRAY>()[2].get<TYPE_BIGINT>(), 42); | ||
| } | ||
| } | ||
|
|
||
| TEST_F(ColumnVariantTest, subcolumn_operations_coverage) { | ||
| // Test insert_range_from | ||
| { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This changes behavior for nullable nested siblings. Before this patch,
try_insert_default_from_nested()calledget_last_field()and returnedfalsewhen the donor leaf row wasNULL, so the caller fell back toentry->data.insert_default(). After the refactor we always cut the last donor row and rebuild a default-shaped subcolumn from it, which means a row likeitems.tags = nullcan now synthesize a nested sibling value for a missingitems.idinstead of keeping that siblingNULLtoo. That is a user-visible semantic change in the JSON/variant parse path, not just a refactor. Please preserve the old null guard here or add an explicit test showing the new behavior is intended.