diff --git a/be/src/core/data_type/data_type_factory.cpp b/be/src/core/data_type/data_type_factory.cpp index 256493cba04e34..ab6551d23ad5dd 100644 --- a/be/src/core/data_type/data_type_factory.cpp +++ b/be/src/core/data_type/data_type_factory.cpp @@ -379,8 +379,23 @@ DataTypePtr DataTypeFactory::create_data_type(const segment_v2::ColumnMetaPB& pc nested = std::make_shared(dataTypes, names); } else { // TODO add precision and frac - nested = _create_primitive_data_type(static_cast(pcolumn.type()), - pcolumn.precision(), pcolumn.frac(), -1); + auto meta_precision = pcolumn.precision(); + auto meta_scale = pcolumn.frac(); + if (pcolumn.type() == static_cast(FieldType::OLAP_FIELD_TYPE_DECIMAL)) { + // Segments written by Doris < 2.1.0 (before #26572) do not persist + // precision/frac in ColumnMetaPB, so they default to 0 when read back. + // Pass UINT32_MAX to DataTypeDecimalV2 to signal that the original + // precision/scale are unknown; otherwise check_type_precision(0) throws + // "meet invalid precision: real_precision=0". + UInt32 orig_precision = + meta_precision > 0 ? static_cast(meta_precision) : UINT32_MAX; + UInt32 orig_scale = meta_precision > 0 ? static_cast(meta_scale) : UINT32_MAX; + nested = _create_primitive_data_type(static_cast(pcolumn.type()), + orig_precision, orig_scale, -1); + } else { + nested = _create_primitive_data_type(static_cast(pcolumn.type()), + meta_precision, meta_scale, -1); + } } if (pcolumn.is_nullable() && nested) { diff --git a/be/test/core/data_type/data_type_decimal_test.cpp b/be/test/core/data_type/data_type_decimal_test.cpp index 312924b5cee2a6..952f8acb3662a8 100644 --- a/be/test/core/data_type/data_type_decimal_test.cpp +++ b/be/test/core/data_type/data_type_decimal_test.cpp @@ -28,16 +28,24 @@ #include #include +#include "agent/be_exec_version_manager.h" #include "common/exception.h" #include "core/assert_cast.h" #include "core/column/column.h" #include "core/data_type/common_data_type_serder_test.h" #include "core/data_type/common_data_type_test.h" #include "core/data_type/data_type.h" +#include "core/data_type/data_type_agg_state.h" +#include "core/data_type/data_type_array.h" #include "core/data_type/data_type_factory.hpp" +#include "core/data_type/data_type_map.h" +#include "core/data_type/data_type_nullable.h" #include "core/data_type/data_type_number.h" +#include "core/data_type/data_type_struct.h" #include "core/field.h" #include "core/types.h" +#include "gen_cpp/segment_v2.pb.h" +#include "storage/tablet/tablet_schema.h" #include "testutil/test_util.h" namespace doris { @@ -446,6 +454,177 @@ TEST_F(DataTypeDecimalTest, ser_deser) { test_func(dt_decimal256_2, *column_decimal256_2, BeExecVersionManager::max_be_exec_version); test_func(dt_decimal256_3, *column_decimal256_3, BeExecVersionManager::max_be_exec_version); } +// Regression for legacy DecimalV2 segments written by Doris < 2.1.0 (before +// #26572): ColumnMetaPB.precision/frac are absent and default to 0 when read +// back. DataTypeFactory must not pass 0 as the original precision/scale to +// DataTypeDecimalV2, otherwise check_type_precision throws +// "meet invalid precision: real_precision=0". +TEST_F(DataTypeDecimalTest, create_decimalv2_from_legacy_tablet_column) { + // Case 1: legacy segment — precision/frac missing (default 0) + { + TabletColumn col; + col.set_type(FieldType::OLAP_FIELD_TYPE_DECIMAL); + // intentionally do not set precision/frac to mimic old segments + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(col, false)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2); + const auto* decv2 = assert_cast(dt.get()); + EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE); + // When original precision/scale are unknown, get_original_* should fall + // back to the in-memory (27, 9) representation. + EXPECT_EQ(decv2->get_original_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE); + } + // Case 2: new segment — precision/frac are persisted (e.g. decimal(26,6)) + { + TabletColumn col; + col.set_type(FieldType::OLAP_FIELD_TYPE_DECIMAL); + col.set_precision(26); + col.set_frac(6); + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(col, false)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2); + const auto* decv2 = assert_cast(dt.get()); + EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE); + EXPECT_EQ(decv2->get_original_precision(), 26U); + EXPECT_EQ(decv2->get_original_scale(), 6U); + } + // Case 3: same regression via the segment-read path that consumes + // ColumnMetaPB directly (this is the exact code path that broke after + // PR #35222 when reading old DecimalV2 segments). + { + segment_v2::ColumnMetaPB meta; + meta.set_type(static_cast(FieldType::OLAP_FIELD_TYPE_DECIMAL)); + // precision/frac intentionally unset -> default 0 + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(meta)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_DECIMALV2); + const auto* decv2 = assert_cast(dt.get()); + EXPECT_EQ(decv2->get_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + EXPECT_EQ(decv2->get_scale(), BeConsts::MAX_DECIMALV2_SCALE); + EXPECT_EQ(decv2->get_original_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE); + } +} + +// Regression for complex types wrapping legacy DecimalV2 children whose +// segment ColumnMetaPB.precision/frac default to 0. The fix in +// DataTypeFactory::create_data_type(segment_v2::ColumnMetaPB&) must +// propagate through Array / Map / Struct / AggState recursively. +TEST_F(DataTypeDecimalTest, create_complex_types_with_legacy_decimalv2) { + auto make_legacy_decv2_meta = []() { + segment_v2::ColumnMetaPB child; + child.set_type(static_cast(FieldType::OLAP_FIELD_TYPE_DECIMAL)); + // precision/frac intentionally unset -> default 0 + return child; + }; + auto unwrap_nullable = [](const DataTypePtr& dt) -> DataTypePtr { + if (dt && dt->is_nullable()) { + return assert_cast(dt.get())->get_nested_type(); + } + return dt; + }; + + // Array + { + segment_v2::ColumnMetaPB meta; + meta.set_type(static_cast(FieldType::OLAP_FIELD_TYPE_ARRAY)); + *meta.add_children_columns() = make_legacy_decv2_meta(); + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(meta)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_ARRAY); + const auto* arr = assert_cast(dt.get()); + auto elem = unwrap_nullable(arr->get_nested_type()); + EXPECT_EQ(elem->get_primitive_type(), TYPE_DECIMALV2); + const auto* decv2 = assert_cast(elem.get()); + EXPECT_EQ(decv2->get_original_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE); + } + + // Map + { + segment_v2::ColumnMetaPB meta; + meta.set_type(static_cast(FieldType::OLAP_FIELD_TYPE_MAP)); + auto* k = meta.add_children_columns(); + k->set_type(static_cast(FieldType::OLAP_FIELD_TYPE_INT)); + *meta.add_children_columns() = make_legacy_decv2_meta(); + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(meta)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_MAP); + const auto* m = assert_cast(dt.get()); + EXPECT_EQ(unwrap_nullable(m->get_key_type())->get_primitive_type(), TYPE_INT); + auto v = unwrap_nullable(m->get_value_type()); + EXPECT_EQ(v->get_primitive_type(), TYPE_DECIMALV2); + const auto* decv2 = assert_cast(v.get()); + EXPECT_EQ(decv2->get_original_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + } + + // Struct + { + segment_v2::ColumnMetaPB meta; + meta.set_type(static_cast(FieldType::OLAP_FIELD_TYPE_STRUCT)); + *meta.add_children_columns() = make_legacy_decv2_meta(); + auto* second = meta.add_children_columns(); + second->set_type(static_cast(FieldType::OLAP_FIELD_TYPE_INT)); + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(meta)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_STRUCT); + const auto* s = assert_cast(dt.get()); + ASSERT_EQ(s->get_elements().size(), 2u); + EXPECT_EQ(unwrap_nullable(s->get_element(0))->get_primitive_type(), TYPE_DECIMALV2); + EXPECT_EQ(unwrap_nullable(s->get_element(1))->get_primitive_type(), TYPE_INT); + } + + // Nested: Array> + { + segment_v2::ColumnMetaPB meta; + meta.set_type(static_cast(FieldType::OLAP_FIELD_TYPE_ARRAY)); + auto* map_child = meta.add_children_columns(); + map_child->set_type(static_cast(FieldType::OLAP_FIELD_TYPE_MAP)); + auto* k = map_child->add_children_columns(); + k->set_type(static_cast(FieldType::OLAP_FIELD_TYPE_INT)); + *map_child->add_children_columns() = make_legacy_decv2_meta(); + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(meta)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_ARRAY); + auto inner = + unwrap_nullable(assert_cast(dt.get())->get_nested_type()); + EXPECT_EQ(inner->get_primitive_type(), TYPE_MAP); + auto v = unwrap_nullable(assert_cast(inner.get())->get_value_type()); + EXPECT_EQ(v->get_primitive_type(), TYPE_DECIMALV2); + } + + // AggState (count) with legacy DecimalV2 sub-type + { + segment_v2::ColumnMetaPB meta; + meta.set_type(static_cast(FieldType::OLAP_FIELD_TYPE_AGG_STATE)); + meta.set_function_name("count"); + meta.set_result_is_nullable(false); + meta.set_be_exec_version(BeExecVersionManager::get_newest_version()); + *meta.add_children_columns() = make_legacy_decv2_meta(); + DataTypePtr dt; + EXPECT_NO_THROW(dt = DataTypeFactory::instance().create_data_type(meta)); + ASSERT_NE(dt, nullptr); + EXPECT_EQ(dt->get_primitive_type(), TYPE_AGG_STATE); + const auto* agg = assert_cast(dt.get()); + ASSERT_EQ(agg->get_sub_types().size(), 1u); + auto sub = unwrap_nullable(agg->get_sub_types()[0]); + EXPECT_EQ(sub->get_primitive_type(), TYPE_DECIMALV2); + const auto* decv2 = assert_cast(sub.get()); + EXPECT_EQ(decv2->get_original_precision(), BeConsts::MAX_DECIMALV2_PRECISION); + EXPECT_EQ(decv2->get_original_scale(), BeConsts::MAX_DECIMALV2_SCALE); + } +} + TEST_F(DataTypeDecimalTest, to_pb_column_meta) { auto test_func = [](auto dt, PGenericType_TypeId expected_type) { auto col_meta = std::make_shared();