From ebd4720ffe8e7f0de148fae793e096fbd4534572 Mon Sep 17 00:00:00 2001 From: jacktengg Date: Mon, 25 May 2026 00:31:16 +0800 Subject: [PATCH] branch-4.0: [fix](be) Handle legacy DecimalV2 segments with missing precision/frac (#63569) Pick: #63569 Problem Summary: After upgrading from 3.1.4 to 4.0.4, queries on DecimalV2 columns fail with: [INTERNAL_ERROR]meet invalid precision: real_precision=0, max_decimal_precision=27, min_decimal_precision=1 Root cause: Segments written by Doris < 2.1.0 (before #26572) do not persist precision/frac in ColumnMetaPB; they default to 0 when read back. Since #35222, DataTypeFactory passes those raw values as original_precision/original_scale into DataTypeDecimalV2, which calls check_type_precision(0) and throws. 3.1.4 was unaffected because the old code hardcoded (27, 9). Fix: In _create_primitive_data_type for OLAP_FIELD_TYPE_DECIMAL, when precision is not positive (legacy segment), pass UINT32_MAX to DataTypeDecimalV2 to signal that the original precision/scale are unknown. DecimalScaleInfo already treats UINT32_MAX as 'unknown' and falls back to the in-memory (27, 9) representation in get_original_precision()/get_original_scale(). Fix "meet invalid precision: real_precision=0" when querying DecimalV2 columns on segments written by Doris versions older than 2.1.0. - Test: Unit Test - Added DataTypeDecimalTest.create_decimalv2_from_legacy_tablet_column covering: legacy TabletColumn (unset precision/frac), modern TabletColumn with decimal(26,6), and legacy segment_v2::ColumnMetaPB with default-0 precision/frac. - Behavior changed: No - Does this need documentation: No Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- be/src/vec/data_types/data_type_factory.cpp | 19 +- .../vec/data_types/data_type_decimal_test.cpp | 176 ++++++++++++++++++ 2 files changed, 193 insertions(+), 2 deletions(-) diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index 5849781a2e5846..e6bfadd32a4a07 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -385,8 +385,23 @@ DataTypePtr DataTypeFactory::create_data_type(const segment_v2::ColumnMetaPB& pc nested = std::make_shared(dataTypes, names); } else { // TODO add precision and frac - nested = _create_primitive_data_type(static_cast(pcolumn.type()), - pcolumn.precision(), pcolumn.frac(), -1); + auto meta_precision = pcolumn.precision(); + auto meta_scale = pcolumn.frac(); + if (pcolumn.type() == static_cast(FieldType::OLAP_FIELD_TYPE_DECIMAL)) { + // Segments written by Doris < 2.1.0 (before #26572) do not persist + // precision/frac in ColumnMetaPB, so they default to 0 when read back. + // Pass UINT32_MAX to DataTypeDecimalV2 to signal that the original + // precision/scale are unknown; otherwise check_type_precision(0) throws + // "meet invalid precision: real_precision=0". + UInt32 orig_precision = + meta_precision > 0 ? static_cast(meta_precision) : UINT32_MAX; + UInt32 orig_scale = meta_precision > 0 ? static_cast(meta_scale) : UINT32_MAX; + nested = _create_primitive_data_type(static_cast(pcolumn.type()), + orig_precision, orig_scale, -1); + } else { + nested = _create_primitive_data_type(static_cast(pcolumn.type()), + meta_precision, meta_scale, -1); + } } if (pcolumn.is_nullable() && nested) { diff --git a/be/test/vec/data_types/data_type_decimal_test.cpp b/be/test/vec/data_types/data_type_decimal_test.cpp index 4d96c3a22604a2..c462d0414e3b8f 100644 --- a/be/test/vec/data_types/data_type_decimal_test.cpp +++ b/be/test/vec/data_types/data_type_decimal_test.cpp @@ -29,6 +29,7 @@ #include #include "common/exception.h" +#include "gen_cpp/segment_v2.pb.h" #include "testutil/test_util.h" #include "vec/columns/column.h" #include "vec/common/assert_cast.h" @@ -37,8 +38,12 @@ #include "vec/data_types/common_data_type_serder_test.h" #include "vec/data_types/common_data_type_test.h" #include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_agg_state.h" +#include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_factory.hpp" +#include "vec/data_types/data_type_map.h" #include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_struct.h" namespace doris::vectorized { static std::string test_data_dir; @@ -474,6 +479,177 @@ TEST_F(DataTypeDecimalTest, ser_deser) { test_func(dt_decimal256_3, *column_decimal256_3, USE_CONST_SERDE); test_func(dt_decimal256_3, *column_decimal256_3, AGGREGATION_2_1_VERSION); } +// Regression for legacy DecimalV2 segments written by Doris < 2.1.0 (before +// #26572): ColumnMetaPB.precision/frac are absent and default to 0 when read +// back. DataTypeFactory must not pass 0 as the original precision/scale to +// DataTypeDecimalV2, otherwise check_type_precision throws +// "meet invalid precision: real_precision=0". +TEST_F(DataTypeDecimalTest, create_decimalv2_from_legacy_tablet_column) { + // Case 1: legacy segment — precision/frac missing (default 0) + { + TabletColumn col; + col.set_type(FieldType::OLAP_FIELD_TYPE_DECIMAL); + // intentionally do not set precision/frac to mimic old segments + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(col, false)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2); + const auto* decv2 = assert_cast(dt.get()); + EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE); + // When original precision/scale are unknown, get_original_* should fall + // back to the in-memory (27, 9) representation. + EXPECT_EQ(decv2->get_original_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE); + } + // Case 2: new segment — precision/frac are persisted (e.g. decimal(26,6)) + { + TabletColumn col; + col.set_type(FieldType::OLAP_FIELD_TYPE_DECIMAL); + col.set_precision(26); + col.set_frac(6); + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(col, false)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2); + const auto* decv2 = assert_cast(dt.get()); + EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE); + EXPECT_EQ(decv2->get_original_precision(), 26U); + EXPECT_EQ(decv2->get_original_scale(), 6U); + } + // Case 3: same regression via the segment-read path that consumes + // ColumnMetaPB directly (this is the exact code path that broke after + // PR #35222 when reading old DecimalV2 segments). + { + segment_v2::ColumnMetaPB meta; + meta.set_type(static_cast(FieldType::OLAP_FIELD_TYPE_DECIMAL)); + // precision/frac intentionally unset -> default 0 + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(meta)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2); + const auto* decv2 = assert_cast(dt.get()); + EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE); + EXPECT_EQ(decv2->get_original_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE); + } +} + +// Regression for complex types wrapping legacy DecimalV2 children whose +// segment ColumnMetaPB.precision/frac default to 0. The fix in +// DataTypeFactory::create_data_type(segment_v2::ColumnMetaPB&) must +// propagate through Array / Map / Struct / AggState recursively. +TEST_F(DataTypeDecimalTest, create_complex_types_with_legacy_decimalv2) { + auto make_legacy_decv2_meta = []() { + segment_v2::ColumnMetaPB child; + child.set_type(static_cast(FieldType::OLAP_FIELD_TYPE_DECIMAL)); + // precision/frac intentionally unset -> default 0 + return child; + }; + auto unwrap_nullable = [](const DataTypePtr& dt) -> DataTypePtr { + if (dt && dt->is_nullable()) { + return assert_cast(dt.get())->get_nested_type(); + } + return dt; + }; + + // Array + { + segment_v2::ColumnMetaPB meta; + meta.set_type(static_cast(FieldType::OLAP_FIELD_TYPE_ARRAY)); + *meta.add_children_columns() = make_legacy_decv2_meta(); + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(meta)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_ARRAY); + const auto* arr = assert_cast(dt.get()); + auto elem = unwrap_nullable(arr->get_nested_type()); + EXPECT_EQ(elem->get_primitive_type(), TYPE_DECIMALV2); + const auto* decv2 = assert_cast(elem.get()); + EXPECT_EQ(decv2->get_original_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE); + } + + // Map + { + segment_v2::ColumnMetaPB meta; + meta.set_type(static_cast(FieldType::OLAP_FIELD_TYPE_MAP)); + auto* k = meta.add_children_columns(); + k->set_type(static_cast(FieldType::OLAP_FIELD_TYPE_INT)); + *meta.add_children_columns() = make_legacy_decv2_meta(); + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(meta)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_MAP); + const auto* m = assert_cast(dt.get()); + EXPECT_EQ(unwrap_nullable(m->get_key_type())->get_primitive_type(), TYPE_INT); + auto v = unwrap_nullable(m->get_value_type()); + EXPECT_EQ(v->get_primitive_type(), TYPE_DECIMALV2); + const auto* decv2 = assert_cast(v.get()); + EXPECT_EQ(decv2->get_original_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + } + + // Struct + { + segment_v2::ColumnMetaPB meta; + meta.set_type(static_cast(FieldType::OLAP_FIELD_TYPE_STRUCT)); + *meta.add_children_columns() = make_legacy_decv2_meta(); + auto* second = meta.add_children_columns(); + second->set_type(static_cast(FieldType::OLAP_FIELD_TYPE_INT)); + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(meta)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_STRUCT); + const auto* s = assert_cast(dt.get()); + ASSERT_EQ(s->get_elements().size(), 2u); + EXPECT_EQ(unwrap_nullable(s->get_element(0))->get_primitive_type(), TYPE_DECIMALV2); + EXPECT_EQ(unwrap_nullable(s->get_element(1))->get_primitive_type(), TYPE_INT); + } + + // Nested: Array> + { + segment_v2::ColumnMetaPB meta; + meta.set_type(static_cast(FieldType::OLAP_FIELD_TYPE_ARRAY)); + auto* map_child = meta.add_children_columns(); + map_child->set_type(static_cast(FieldType::OLAP_FIELD_TYPE_MAP)); + auto* k = map_child->add_children_columns(); + k->set_type(static_cast(FieldType::OLAP_FIELD_TYPE_INT)); + *map_child->add_children_columns() = make_legacy_decv2_meta(); + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(meta)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_ARRAY); + auto inner = + unwrap_nullable(assert_cast(dt.get())->get_nested_type()); + EXPECT_EQ(inner->get_primitive_type(), TYPE_MAP); + auto v = unwrap_nullable(assert_cast(inner.get())->get_value_type()); + EXPECT_EQ(v->get_primitive_type(), TYPE_DECIMALV2); + } + + // AggState (count) with legacy DecimalV2 sub-type + { + segment_v2::ColumnMetaPB meta; + meta.set_type(static_cast(FieldType::OLAP_FIELD_TYPE_AGG_STATE)); + meta.set_function_name("count"); + meta.set_result_is_nullable(false); + meta.set_be_exec_version(BeExecVersionManager::get_newest_version()); + *meta.add_children_columns() = make_legacy_decv2_meta(); + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(meta)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_AGG_STATE); + const auto* agg = assert_cast(dt.get()); + ASSERT_EQ(agg->get_sub_types().size(), 1u); + auto sub = unwrap_nullable(agg->get_sub_types()[0]); + EXPECT_EQ(sub->get_primitive_type(), TYPE_DECIMALV2); + const auto* decv2 = assert_cast(sub.get()); + EXPECT_EQ(decv2->get_original_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE); + } +} + TEST_F(DataTypeDecimalTest, to_pb_column_meta) { auto test_func = [](auto dt, PGenericType_TypeId expected_type) { auto col_meta = std::make_shared();