From 451cf0914c3fd28bc18e7fc979eb8de5a70eb429 Mon Sep 17 00:00:00 2001 From: csun5285 Date: Tue, 28 Apr 2026 19:56:40 +0800 Subject: [PATCH 01/10] [refactor](inverted-index) replace void* query_value with typed param interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Production query path no longer carries a const void* + reinterpret_cast through InvertedIndexReader::query / try_query. Three classes with distinct responsibilities replace the old conflated InvertedIndexQueryParamFactory: * InvertedIndexQueryParam — abstract value interface; readers pull the value via typed virtuals (get_string / encode_ascending / encode_min_ascending / encode_max_ascending). * TypedInvertedIndexQueryParam — concrete typed value; numeric/ date/decimal/IP specialisation implements the encode_* virtuals using type_limit<>; string specialisation implements get_string only. * InvertedIndexQueryParamFactory — static-only namespace class that maps FE values onto the correct TypedInvertedIndexQueryParam; no instances, no inheritance. BkdIndexReader::construct_bkd_query_value drops the std::vector tmp scratch buffer and the _type_info->set_to_min/max calls used to synthesize +/-infinity sentinels for half-bounded range queries. The sentinel is now produced directly by the typed query value (encode_min_ascending / encode_max_ascending), so only inverted-index supported types ever need to know how to emit a min/max. With BKD no longer the only consumer, the entire TypeInfo::set_to_min/max API surface is removed: TypeInfo virtuals, ScalarTypeInfo storage, List/Map/Struct DCHECK-fail overrides, every FieldTypeTraits<...> specialization, the OLAP_FIELD_TYPE_CHAR static function pointer in types.cpp, Field::set_to_min/max wrappers, and the CharField/VarcharField /StringField overrides. Corresponding storage_types_test cases are removed. Co-Authored-By: Claude Opus 4.7 (1M context) --- be/src/core/type_limit.h | 20 + .../function/array/function_array_index.h | 5 +- .../function/array/function_arrays_overlap.h | 4 +- be/src/exprs/function/function_ip.h | 7 +- .../exprs/function/function_multi_match.cpp | 8 +- be/src/exprs/function/functions_comparison.h | 5 +- be/src/exprs/function/in.h | 5 +- be/src/exprs/function/match.cpp | 5 +- be/src/storage/field.h | 21 - .../inverted/inverted_index_iterator.cpp | 15 +- .../index/inverted/inverted_index_iterator.h | 6 +- .../inverted/inverted_index_query_param.h | 194 +++++ .../index/inverted/inverted_index_reader.cpp | 65 +- .../index/inverted/inverted_index_reader.h | 160 +--- .../storage/predicate/comparison_predicate.h | 5 +- be/src/storage/predicate/in_list_predicate.h | 12 +- be/src/storage/types.cpp | 2 - be/src/storage/types.h | 172 +---- ...omparison_evaluate_inverted_index_test.cpp | 6 +- be/test/exprs/function/function_ip_test.cpp | 6 +- .../util/index_compaction_utils.cpp | 8 +- .../segment/index_reader_helper_test.cpp | 6 +- .../segment/inverted_index_iterator_test.cpp | 4 +- .../inverted_index_query_param_test.cpp | 219 +++--- .../segment/inverted_index_reader_test.cpp | 709 ++++++++++++++++-- .../segment/inverted_index_writer_test.cpp | 25 +- be/test/storage/storage_types_test.cpp | 30 - 27 files changed, 1136 insertions(+), 588 deletions(-) create mode 100644 be/src/storage/index/inverted/inverted_index_query_param.h diff --git a/be/src/core/type_limit.h b/be/src/core/type_limit.h index 824433a03191dc..218f24d3961b47 100644 --- a/be/src/core/type_limit.h +++ b/be/src/core/type_limit.h @@ -17,8 +17,10 @@ #pragma once +#include "core/decimal12.h" #include "core/extended_types.h" #include "core/string_ref.h" +#include "core/uint24.h" #include "core/value/decimalv2_value.h" #include "core/value/timestamptz_value.h" @@ -54,6 +56,24 @@ struct type_limit { static DecimalV2Value max() { return DecimalV2Value::get_max_decimal(); } }; +// std::numeric_limits is not specialised for these custom storage types, so +// the generic type_limit would return T() = zero for both min and max, +// silently breaking BKD half-bounded range queries. + +// DECIMALV2 storage. Largest representable DecimalV2 value (18 digits . 9 digits). +template <> +struct type_limit { + static decimal12_t min() { return decimal12_t {-999999999999999999LL, -999999999}; } + static decimal12_t max() { return decimal12_t {+999999999999999999LL, +999999999}; } +}; + +// DATE storage. Packed as `year<<9 | month<<5 | day`: 33=0001-01-01, 5119903=9999-12-31. +template <> +struct type_limit { + static uint24_t min() { return uint24_t(33); } + static uint24_t max() { return uint24_t(5119903); } +}; + template <> struct type_limit { static Decimal32 max() { return 999999999; } diff --git a/be/src/exprs/function/array/function_array_index.h b/be/src/exprs/function/array/function_array_index.h index 5ab6ca68c3c3f5..2654e46f76a24f 100644 --- a/be/src/exprs/function/array/function_array_index.h +++ b/be/src/exprs/function/array/function_array_index.h @@ -45,6 +45,7 @@ #include "core/types.h" #include "exprs/function/function.h" #include "storage/index/index_reader_helper.h" +#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/inverted_index_query_type.h" #include "storage/index/inverted/inverted_index_reader.h" #include "storage/predicate/column_predicate.h" @@ -164,13 +165,13 @@ class FunctionArrayIndex : public IFunction { RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); null_bitmap = null_bitmap_cache_handle.get_bitmap(); } - std::unique_ptr query_param = nullptr; + std::unique_ptr query_param = nullptr; RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value(param_type, ¶m_value, query_param)); InvertedIndexParam param; param.column_name = data_type_with_name.first; param.column_type = data_type_with_name.second; - param.query_value = query_param->get_value(); + param.query_value = std::move(query_param); param.query_type = segment_v2::InvertedIndexQueryType::EQUAL_QUERY; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/exprs/function/array/function_arrays_overlap.h b/be/src/exprs/function/array/function_arrays_overlap.h index c1acded2e3170b..f788c04e852c6d 100644 --- a/be/src/exprs/function/array/function_arrays_overlap.h +++ b/be/src/exprs/function/array/function_arrays_overlap.h @@ -246,7 +246,7 @@ class FunctionArraysOverlap : public IFunction { RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); null_bitmap = null_bitmap_cache_handle.get_bitmap(); } - std::unique_ptr query_param = nullptr; + std::unique_ptr query_param = nullptr; const Array& query_val = param_value.get(); InvertedIndexParam param; @@ -262,7 +262,7 @@ class FunctionArraysOverlap : public IFunction { } RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( nested_param_type, &nested_query_val, query_param)); - param.query_value = query_param->get_value(); + param.query_value = std::move(query_param); param.roaring = std::make_shared(); param.analyzer_ctx = analyzer_ctx; RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {¶m})); diff --git a/be/src/exprs/function/function_ip.h b/be/src/exprs/function/function_ip.h index 48906f440ec87d..ec6a172e021ec8 100644 --- a/be/src/exprs/function/function_ip.h +++ b/be/src/exprs/function/function_ip.h @@ -48,6 +48,7 @@ #include "exprs/function/function.h" #include "exprs/function/function_helpers.h" #include "storage/index/index_reader_helper.h" +#include "storage/index/inverted/inverted_index_query_param.h" namespace doris { @@ -708,7 +709,7 @@ class FunctionIsIPAddressInRange : public IFunction { std::shared_ptr null_bitmap = std::make_shared(); auto param_type = data_type_with_name.second->get_primitive_type(); - std::unique_ptr query_param = nullptr; + std::unique_ptr query_param = nullptr; // >= min ip RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value( @@ -717,7 +718,7 @@ class FunctionIsIPAddressInRange : public IFunction { min_param.column_name = data_type_with_name.first; min_param.column_type = data_type_with_name.second; min_param.query_type = segment_v2::InvertedIndexQueryType::GREATER_EQUAL_QUERY; - min_param.query_value = query_param->get_value(); + min_param.query_value = std::move(query_param); min_param.num_rows = num_rows; min_param.roaring = std::make_shared(); RETURN_IF_ERROR(iter->read_from_index(&min_param)); @@ -729,7 +730,7 @@ class FunctionIsIPAddressInRange : public IFunction { max_param.column_name = data_type_with_name.first; max_param.column_type = data_type_with_name.second; max_param.query_type = segment_v2::InvertedIndexQueryType::LESS_EQUAL_QUERY; - max_param.query_value = query_param->get_value(); + max_param.query_value = std::move(query_param); max_param.num_rows = num_rows; max_param.roaring = std::make_shared(); RETURN_IF_ERROR(iter->read_from_index(&max_param)); diff --git a/be/src/exprs/function/function_multi_match.cpp b/be/src/exprs/function/function_multi_match.cpp index 2ba2a42b08d04b..68dea3f731e4ab 100644 --- a/be/src/exprs/function/function_multi_match.cpp +++ b/be/src/exprs/function/function_multi_match.cpp @@ -28,6 +28,7 @@ #include "exprs/function/simple_function_factory.h" #include "exprs/vslot_ref.h" #include "io/fs/file_reader.h" +#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/query/phrase_prefix_query.h" #include "storage/segment/segment_iterator.h" @@ -79,13 +80,12 @@ Status FunctionMultiMatch::evaluate_inverted_index( return Status::Error( "arguments for multi_match must be string"); } - // Must convert StringRef to std::string because downstream readers - // (e.g. FullTextIndexReader::query) reinterpret_cast query_value as std::string*. - std::string query_str(query_str_ref.data, query_str_ref.size); + auto query_param = segment_v2::TypedInvertedIndexQueryParam::create_unique(); + query_param->set_value(&query_str_ref); // search InvertedIndexParam param; - param.query_value = &query_str; + param.query_value = std::move(query_param); param.query_type = query_type; param.num_rows = num_rows; for (size_t i = 0; i < data_type_with_names.size(); i++) { diff --git a/be/src/exprs/function/functions_comparison.h b/be/src/exprs/function/functions_comparison.h index 99ad8e87b0b5ba..e5228bd8889285 100644 --- a/be/src/exprs/function/functions_comparison.h +++ b/be/src/exprs/function/functions_comparison.h @@ -40,6 +40,7 @@ #include "exprs/function/function_helpers.h" #include "exprs/function/functions_logical.h" #include "storage/index/index_reader_helper.h" +#include "storage/index/inverted/inverted_index_query_param.h" namespace doris { @@ -487,14 +488,14 @@ class FunctionComparison : public IFunction { return Status::OK(); } auto param_type = arguments[0].type->get_primitive_type(); - std::unique_ptr query_param = nullptr; + std::unique_ptr query_param = nullptr; RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value( param_type, ¶m_value, query_param)); segment_v2::InvertedIndexParam param; param.column_name = data_type_with_name.first; param.column_type = data_type_with_name.second; - param.query_value = query_param->get_value(); + param.query_value = std::move(query_param); param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/exprs/function/in.h b/be/src/exprs/function/in.h index edcf069e6313da..ae69e05d6af19b 100644 --- a/be/src/exprs/function/in.h +++ b/be/src/exprs/function/in.h @@ -46,6 +46,7 @@ #include "exprs/function_context.h" #include "exprs/hybrid_set.h" #include "storage/index/index_reader_helper.h" +#include "storage/index/inverted/inverted_index_query_param.h" namespace doris { @@ -170,14 +171,14 @@ class FunctionIn : public IFunction { *roaring |= *null_bitmap; continue; } - std::unique_ptr query_param = nullptr; + std::unique_ptr query_param = nullptr; RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( param_type, ¶m_value, query_param)); InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY; segment_v2::InvertedIndexParam param; param.column_name = data_type_with_name.first; param.column_type = data_type_with_name.second; - param.query_value = query_param->get_value(); + param.query_value = std::move(query_param); param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/exprs/function/match.cpp b/be/src/exprs/function/match.cpp index d2814d1140aac4..92757b8b8ecd54 100644 --- a/be/src/exprs/function/match.cpp +++ b/be/src/exprs/function/match.cpp @@ -23,6 +23,7 @@ #include "runtime/runtime_state.h" #include "storage/index/index_reader_helper.h" #include "storage/index/inverted/analyzer/analyzer.h" +#include "storage/index/inverted/inverted_index_query_param.h" #include "util/debug_points.h" namespace doris { @@ -79,14 +80,14 @@ Status FunctionMatchBase::evaluate_inverted_index( return Status::Error( "arguments for match must be string"); } - std::unique_ptr query_param = nullptr; + std::unique_ptr query_param = nullptr; RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value(param_type, ¶m_value, query_param)); InvertedIndexParam param; param.column_name = data_type_with_name.first; param.column_type = data_type_with_name.second; - param.query_value = query_param->get_value(); + param.query_value = std::move(query_param); param.query_type = get_query_type_from_fn_name(); param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/storage/field.h b/be/src/storage/field.h index 1658248effd33a..164dd382fc718f 100644 --- a/be/src/storage/field.h +++ b/be/src/storage/field.h @@ -64,10 +64,6 @@ class StorageField { const std::string& name() const { return _name; } const PathInDataPtr& path() const { return _path; } - virtual void set_to_max(char* buf) const { return _type_info->set_to_max(buf); } - - virtual void set_to_min(char* buf) const { return _type_info->set_to_min(buf); } - virtual StorageField* clone() const { auto* local = new StorageField(_desc); this->clone(local); @@ -172,12 +168,6 @@ class CharField : public StorageField { StorageField::clone(local); return local; } - - void set_to_max(char* ch) const override { - auto slice = reinterpret_cast(ch); - slice->size = _length; - memset(slice->data, 0xFF, slice->size); - } }; class VarcharField : public StorageField { @@ -189,12 +179,6 @@ class VarcharField : public StorageField { StorageField::clone(local); return local; } - - void set_to_max(char* ch) const override { - auto slice = reinterpret_cast(ch); - slice->size = _length - OLAP_VARCHAR_MAX_BYTES; - memset(slice->data, 0xFF, slice->size); - } }; class StringField : public StorageField { public: @@ -205,11 +189,6 @@ class StringField : public StorageField { StorageField::clone(local); return local; } - - void set_to_max(char* ch) const override { - auto slice = reinterpret_cast(ch); - memset(slice->data, 0xFF, slice->size); - } }; class BitmapAggField : public StorageField { diff --git a/be/src/storage/index/inverted/inverted_index_iterator.cpp b/be/src/storage/index/inverted/inverted_index_iterator.cpp index fbe06c054e51fb..c72bf00fffe64e 100644 --- a/be/src/storage/index/inverted/inverted_index_iterator.cpp +++ b/be/src/storage/index/inverted/inverted_index_iterator.cpp @@ -88,8 +88,8 @@ Status InvertedIndexIterator::read_from_index(const IndexParam& param) { runtime_state->query_options().inverted_index_skip_threshold; size_t hit_count = 0; RETURN_IF_ERROR(try_read_from_inverted_index(reader, i_param->column_name, - i_param->query_value, i_param->query_type, - &hit_count)); + i_param->query_value.get(), + i_param->query_type, &hit_count)); if (hit_count > i_param->num_rows * query_bkd_limit_percent / 100) { return Status::Error( "hit count: {}, bkd inverted reached limit {}% , segment num " @@ -101,7 +101,7 @@ Status InvertedIndexIterator::read_from_index(const IndexParam& param) { // Note: analyzer_ctx is now passed via i_param->analyzer_ctx auto execute_query = [&]() { - return reader->query(_context, i_param->column_name, i_param->query_value, + return reader->query(_context, i_param->column_name, i_param->query_value.get(), i_param->query_type, i_param->roaring, i_param->analyzer_ctx); }; @@ -133,11 +133,10 @@ Result InvertedIndexIterator::has_null() { return reader->has_null(); } -Status InvertedIndexIterator::try_read_from_inverted_index(const InvertedIndexReaderPtr& reader, - const std::string& column_name, - const void* query_value, - InvertedIndexQueryType query_type, - size_t* count) { +Status InvertedIndexIterator::try_read_from_inverted_index( + const InvertedIndexReaderPtr& reader, const std::string& column_name, + const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, + size_t* count) { // NOTE: only bkd index support try read now. if (query_type == InvertedIndexQueryType::GREATER_EQUAL_QUERY || query_type == InvertedIndexQueryType::GREATER_THAN_QUERY || diff --git a/be/src/storage/index/inverted/inverted_index_iterator.h b/be/src/storage/index/inverted/inverted_index_iterator.h index e7418535d3cd48..7ecef754045fc7 100644 --- a/be/src/storage/index/inverted/inverted_index_iterator.h +++ b/be/src/storage/index/inverted/inverted_index_iterator.h @@ -29,7 +29,8 @@ namespace doris::segment_v2 { struct InvertedIndexParam { std::string column_name; DataTypePtr column_type; - const void* query_value; + // Owns the typed query value; reader receives query_value.get(). + std::unique_ptr query_value; InvertedIndexQueryType query_type; uint32_t num_rows; std::shared_ptr roaring; @@ -73,7 +74,8 @@ class InvertedIndexIterator : public IndexIterator { ENABLE_FACTORY_CREATOR(InvertedIndexIterator); Status try_read_from_inverted_index(const InvertedIndexReaderPtr& reader, - const std::string& column_name, const void* query_value, + const std::string& column_name, + const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, size_t* count); // Normalize analyzer_key to lowercase. diff --git a/be/src/storage/index/inverted/inverted_index_query_param.h b/be/src/storage/index/inverted/inverted_index_query_param.h new file mode 100644 index 00000000000000..618b838ae4d6b4 --- /dev/null +++ b/be/src/storage/index/inverted/inverted_index_query_param.h @@ -0,0 +1,194 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "common/factory_creator.h" +#include "common/status.h" +#include "core/data_type/primitive_type.h" +#include "core/string_ref.h" +#include "core/type_limit.h" +#include "storage/index/inverted/inverted_index_reader.h" +#include "storage/key_coder.h" + +namespace doris { +class Field; + +namespace segment_v2 { + +// Typed query value passed from FE to InvertedIndexReader. +// Two disjoint shapes under a common root for unified ownership: +// StringQueryParam — get_string() (FullText / String readers) +// NumericQueryParam — encode_ascending() + encode_min/max_* (BkdIndexReader) +// TypedInvertedIndexQueryParam picks its base via PT (string vs numeric). + +class InvertedIndexQueryParam { +public: + virtual ~InvertedIndexQueryParam() = default; +}; + +class StringQueryParam : public InvertedIndexQueryParam { +public: + virtual void get_string(std::string* out) const = 0; +}; + +class NumericQueryParam : public InvertedIndexQueryParam { +public: + // Encode _value through KeyCoder ascending. encode_min/max encode the type's + // sentinel for the open side of BKD half-bounded ranges. + virtual void encode_ascending(const KeyCoder* coder, std::string* out) const = 0; + virtual void encode_min_ascending(const KeyCoder* coder, std::string* out) const = 0; + virtual void encode_max_ascending(const KeyCoder* coder, std::string* out) const = 0; +}; + +template +class TypedInvertedIndexQueryParam : public NumericQueryParam { + ENABLE_FACTORY_CREATOR(TypedInvertedIndexQueryParam); + +public: + // Storage type aligned with KeyCoder's view. Override per-PT when + // PrimitiveTypeTraits disagrees with KeyCoder on signedness — otherwise + // type_limit::min/max produces broken sentinels. + // TYPE_DATETIME: PrimitiveTypeTraits=uint64_t but KeyCoder=int64_t. With + // uint64_t, type_limit::max() = UINT64_MAX is read as -1 and encodes + // smaller than any real datetime — broken +inf. + using storage_val = std::conditional_t::StorageFieldType>; + + void set_value(const storage_val* value) { _value = *value; } + const storage_val& value() const { return _value; } + + void encode_ascending(const KeyCoder* coder, std::string* out) const override { + coder->full_encode_ascending(&_value, out); + } + void encode_min_ascending(const KeyCoder* coder, std::string* out) const override { + storage_val v = type_limit::min(); + coder->full_encode_ascending(&v, out); + } + void encode_max_ascending(const KeyCoder* coder, std::string* out) const override { + storage_val v = type_limit::max(); + coder->full_encode_ascending(&v, out); + } + +private: + storage_val _value; +}; + +template + requires(is_string_type(PT)) +class TypedInvertedIndexQueryParam : public StringQueryParam { + ENABLE_FACTORY_CREATOR(TypedInvertedIndexQueryParam); + +public: + void set_value(const std::string& value) { _value = value; } + void set_value(const StringRef* value) { _value.assign(value->data, value->size); } + + const std::string& value() const { return _value; } + + void get_string(std::string* out) const override { *out = _value; } + +private: + std::string _value; +}; + +// Static-only: maps FE values (Field / scalars / StringRef) to the right +// TypedInvertedIndexQueryParam. +class InvertedIndexQueryParamFactory { +public: + InvertedIndexQueryParamFactory() = delete; + + template + static Status create_query_value(const ValueType* value, + std::unique_ptr& result_param) { + static_assert(!std::is_same_v, + "ValueType cannot be void, as it is unsupported and dangerous."); + + using CPP_TYPE = typename PrimitiveTypeTraits::CppType; + std::unique_ptr> param = + TypedInvertedIndexQueryParam::create_unique(); + + if constexpr (is_string_type(PT)) { + if constexpr (std::is_same_v) { + const auto& str = value->template get(); + param->set_value(str); + } else if constexpr (std::is_same_v) { + param->set_value(value); + } else { + static_assert(std::is_convertible_v, + "ValueType must be convertible to std::string for string types"); + param->set_value(std::string(*value)); + } + } else { + CPP_TYPE cpp_val; + if constexpr (std::is_same_v) { + auto field_val = value->template get(); + cpp_val = static_cast(field_val); + } else { + cpp_val = static_cast(*value); + } + + typename TypedInvertedIndexQueryParam::storage_val storage_val_v = + PrimitiveTypeConvertor::to_storage_field_type(cpp_val); + param->set_value(&storage_val_v); + } + result_param = std::move(param); + return Status::OK(); + } + + static Status create_query_value(const PrimitiveType& primitiveType, const doris::Field* value, + std::unique_ptr& result_param) { + switch (primitiveType) { +#define M(TYPE) \ + case TYPE: { \ + return create_query_value(value, result_param); \ + } + M(PrimitiveType::TYPE_BOOLEAN) + M(PrimitiveType::TYPE_TINYINT) + M(PrimitiveType::TYPE_SMALLINT) + M(PrimitiveType::TYPE_INT) + M(PrimitiveType::TYPE_BIGINT) + M(PrimitiveType::TYPE_LARGEINT) + M(PrimitiveType::TYPE_FLOAT) + M(PrimitiveType::TYPE_DOUBLE) + M(PrimitiveType::TYPE_DECIMALV2) + M(PrimitiveType::TYPE_DECIMAL32) + M(PrimitiveType::TYPE_DECIMAL64) + M(PrimitiveType::TYPE_DECIMAL128I) + M(PrimitiveType::TYPE_DECIMAL256) + M(PrimitiveType::TYPE_DATE) + M(PrimitiveType::TYPE_DATETIME) + M(PrimitiveType::TYPE_CHAR) + M(PrimitiveType::TYPE_VARCHAR) + M(PrimitiveType::TYPE_STRING) + M(PrimitiveType::TYPE_DATEV2) + M(PrimitiveType::TYPE_DATETIMEV2) + M(PrimitiveType::TYPE_IPV4) + M(PrimitiveType::TYPE_IPV6) +#undef M + default: + return Status::NotSupported("Unsupported primitive type {} for inverted index reader", + primitiveType); + } + } +}; + +} // namespace segment_v2 +} // namespace doris diff --git a/be/src/storage/index/inverted/inverted_index_reader.cpp b/be/src/storage/index/inverted/inverted_index_reader.cpp index f7b1b76dc6db0d..c05e2068d9bf61 100644 --- a/be/src/storage/index/inverted/inverted_index_reader.cpp +++ b/be/src/storage/index/inverted/inverted_index_reader.cpp @@ -49,6 +49,7 @@ #include "storage/index/inverted/inverted_index_fs_directory.h" #include "storage/index/inverted/inverted_index_iterator.h" #include "storage/index/inverted/inverted_index_parser.h" +#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/inverted_index_query_type.h" #include "storage/index/inverted/inverted_index_searcher.h" #include "storage/index/inverted/query/phrase_query.h" @@ -289,13 +290,19 @@ Status FullTextIndexReader::new_iterator(std::unique_ptr* iterato } Status FullTextIndexReader::query(const IndexQueryContextPtr& context, - const std::string& column_name, const void* query_value, + const std::string& column_name, + const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx) { SCOPED_RAW_TIMER(&context->stats->inverted_index_query_timer); - std::string search_str = *reinterpret_cast(query_value); + const auto* str_param = dynamic_cast(query_value); + if (str_param == nullptr) { + return Status::InternalError("FullTextIndexReader expected StringQueryParam"); + } + std::string search_str; + str_param->get_string(&search_str); VLOG_DEBUG << column_name << " begin to search the fulltext index from clucene, query_str [" << search_str << "]"; @@ -408,13 +415,19 @@ Status StringTypeInvertedIndexReader::new_iterator(std::unique_ptr& bit_map, const InvertedIndexAnalyzerCtx* /*analyzer_ctx*/) { SCOPED_RAW_TIMER(&context->stats->inverted_index_query_timer); - std::string search_str = *reinterpret_cast(query_value); + const auto* str_param = dynamic_cast(query_value); + if (str_param == nullptr) { + return Status::InternalError("StringTypeInvertedIndexReader expected StringQueryParam"); + } + std::string search_str; + str_param->get_string(&search_str); // If the written value exceeds ignore_above, it will be written as null. // The queried value exceeds ignore_above means the written value cannot be found. @@ -541,23 +554,20 @@ Status BkdIndexReader::new_iterator(std::unique_ptr* iterator) { } template -Status BkdIndexReader::construct_bkd_query_value(const void* query_value, +Status BkdIndexReader::construct_bkd_query_value(const NumericQueryParam* query_value, std::shared_ptr r, InvertedIndexVisitor* visitor) { - std::vector tmp(r->bytes_per_dim_); if constexpr (QT == InvertedIndexQueryType::EQUAL_QUERY) { - _value_key_coder->full_encode_ascending(query_value, &visitor->query_max); - _value_key_coder->full_encode_ascending(query_value, &visitor->query_min); + query_value->encode_ascending(_value_key_coder, &visitor->query_max); + query_value->encode_ascending(_value_key_coder, &visitor->query_min); } else if constexpr (QT == InvertedIndexQueryType::LESS_THAN_QUERY || QT == InvertedIndexQueryType::LESS_EQUAL_QUERY) { - _value_key_coder->full_encode_ascending(query_value, &visitor->query_max); - _type_info->set_to_min(tmp.data()); - _value_key_coder->full_encode_ascending(tmp.data(), &visitor->query_min); + query_value->encode_ascending(_value_key_coder, &visitor->query_max); + query_value->encode_min_ascending(_value_key_coder, &visitor->query_min); } else if constexpr (QT == InvertedIndexQueryType::GREATER_THAN_QUERY || QT == InvertedIndexQueryType::GREATER_EQUAL_QUERY) { - _value_key_coder->full_encode_ascending(query_value, &visitor->query_min); - _type_info->set_to_max(tmp.data()); - _value_key_coder->full_encode_ascending(tmp.data(), &visitor->query_max); + query_value->encode_ascending(_value_key_coder, &visitor->query_min); + query_value->encode_max_ascending(_value_key_coder, &visitor->query_max); } else { return Status::Error( "invalid query type when query bkd index"); @@ -566,7 +576,7 @@ Status BkdIndexReader::construct_bkd_query_value(const void* query_value, } Status BkdIndexReader::invoke_bkd_try_query(const IndexQueryContextPtr& context, - const void* query_value, + const NumericQueryParam* query_value, InvertedIndexQueryType query_type, std::shared_ptr r, size_t* count) { @@ -617,7 +627,8 @@ Status BkdIndexReader::invoke_bkd_try_query(const IndexQueryContextPtr& context, } Status BkdIndexReader::invoke_bkd_query(const IndexQueryContextPtr& context, - const void* query_value, InvertedIndexQueryType query_type, + const NumericQueryParam* query_value, + InvertedIndexQueryType query_type, std::shared_ptr r, std::shared_ptr& bit_map) { SCOPED_RAW_TIMER(&context->stats->inverted_index_searcher_search_timer); @@ -668,8 +679,13 @@ Status BkdIndexReader::invoke_bkd_query(const IndexQueryContextPtr& context, } Status BkdIndexReader::try_query(const IndexQueryContextPtr& context, - const std::string& column_name, const void* query_value, + const std::string& column_name, + const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, size_t* count) { + const auto* num_param = dynamic_cast(query_value); + if (num_param == nullptr) { + return Status::InternalError("BkdIndexReader::try_query expected NumericQueryParam"); + } try { std::shared_ptr r; auto st = get_bkd_reader(context, r); @@ -680,7 +696,7 @@ Status BkdIndexReader::try_query(const IndexQueryContextPtr& context, return st; } std::string query_str; - _value_key_coder->full_encode_ascending(query_value, &query_str); + num_param->encode_ascending(_value_key_coder, &query_str); auto index_file_key = _index_file_reader->get_index_file_cache_key(&_index_meta); InvertedIndexQueryCache::CacheKey cache_key {index_file_key, column_name, query_type, @@ -693,7 +709,7 @@ Status BkdIndexReader::try_query(const IndexQueryContextPtr& context, return Status::OK(); } - return invoke_bkd_try_query(context, query_value, query_type, r, count); + return invoke_bkd_try_query(context, num_param, query_type, r, count); } catch (const CLuceneError& e) { return Status::Error( "BKD Query CLuceneError Occurred, error msg: {}", e.what()); @@ -704,11 +720,16 @@ Status BkdIndexReader::try_query(const IndexQueryContextPtr& context, } Status BkdIndexReader::query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const InvertedIndexQueryParam* query_value, + InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* /*analyzer_ctx*/) { SCOPED_RAW_TIMER(&context->stats->inverted_index_query_timer); + const auto* num_param = dynamic_cast(query_value); + if (num_param == nullptr) { + return Status::InternalError("BkdIndexReader::query expected NumericQueryParam"); + } try { std::shared_ptr r; auto st = get_bkd_reader(context, r); @@ -719,7 +740,7 @@ Status BkdIndexReader::query(const IndexQueryContextPtr& context, const std::str return st; } std::string query_str; - _value_key_coder->full_encode_ascending(query_value, &query_str); + num_param->encode_ascending(_value_key_coder, &query_str); auto index_file_key = _index_file_reader->get_index_file_cache_key(&_index_meta); InvertedIndexQueryCache::CacheKey cache_key {index_file_key, column_name, query_type, @@ -730,7 +751,7 @@ Status BkdIndexReader::query(const IndexQueryContextPtr& context, const std::str return Status::OK(); } - RETURN_IF_ERROR(invoke_bkd_query(context, query_value, query_type, r, bit_map)); + RETURN_IF_ERROR(invoke_bkd_query(context, num_param, query_type, r, bit_map)); bit_map->runOptimize(); cache->insert(cache_key, bit_map, &cache_handler); diff --git a/be/src/storage/index/inverted/inverted_index_reader.h b/be/src/storage/index/inverted/inverted_index_reader.h index 906c1f512a15f1..d39fa2c60dec00 100644 --- a/be/src/storage/index/inverted/inverted_index_reader.h +++ b/be/src/storage/index/inverted/inverted_index_reader.h @@ -76,6 +76,9 @@ class InvertedIndexQueryCacheHandle; class IndexFileReader; class InvertedIndexQueryInfo; class IndexIterator; +class InvertedIndexQueryParam; +class StringQueryParam; +class NumericQueryParam; class InvertedIndexResultBitmap { private: @@ -222,13 +225,17 @@ class InvertedIndexReader : public IndexReader { IndexType index_type() override { return IndexType::INVERTED; } + // Callers pass a TypedInvertedIndexQueryParam produced by + // InvertedIndexQueryParamFactory. Each reader static_casts to the + // appropriate intermediate (StringQueryParam / NumericQueryParam) at entry. virtual Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const InvertedIndexQueryParam* query_value, + InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) = 0; virtual Status try_query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, - size_t* count) = 0; + const InvertedIndexQueryParam* query_value, + InvertedIndexQueryType query_type, size_t* count) = 0; Status read_null_bitmap(const IndexQueryContextPtr& context, InvertedIndexQueryCacheHandle* cache_handle, @@ -285,11 +292,11 @@ class FullTextIndexReader : public InvertedIndexReader { Status new_iterator(std::unique_ptr* iterator) override; Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override; Status try_query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, size_t* count) override { return Status::Error( "FullTextIndexReader not support try_query"); @@ -310,11 +317,11 @@ class StringTypeInvertedIndexReader : public InvertedIndexReader { Status new_iterator(std::unique_ptr* iterator) override; Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override; Status try_query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, size_t* count) override { return Status::Error( "StringTypeInvertedIndexReader not support try_query"); @@ -370,21 +377,22 @@ class BkdIndexReader : public InvertedIndexReader { Status new_iterator(std::unique_ptr* iterator) override; Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override; Status try_query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, size_t* count) override; - Status invoke_bkd_try_query(const IndexQueryContextPtr& context, const void* query_value, + Status invoke_bkd_try_query(const IndexQueryContextPtr& context, + const NumericQueryParam* query_value, InvertedIndexQueryType query_type, std::shared_ptr r, size_t* count); - Status invoke_bkd_query(const IndexQueryContextPtr& context, const void* query_value, - InvertedIndexQueryType query_type, + Status invoke_bkd_query(const IndexQueryContextPtr& context, + const NumericQueryParam* query_value, InvertedIndexQueryType query_type, std::shared_ptr r, std::shared_ptr& bit_map); template - Status construct_bkd_query_value(const void* query_value, + Status construct_bkd_query_value(const NumericQueryParam* query_value, std::shared_ptr r, InvertedIndexVisitor* visitor); @@ -396,131 +404,5 @@ class BkdIndexReader : public InvertedIndexReader { const KeyCoder* _value_key_coder {}; }; -template -class InvertedIndexQueryParam; - -/** - * @brief InvertedIndexQueryParamFactory is a factory class to create QueryValue object. - * we need a template function to make predict class like in_list_predict template class to use. - * also need a function with primitive type parameter to create inverted index query value. like some function expr: function_array_index - * Now we just mapping field value in query engine to storage field value - */ -class InvertedIndexQueryParamFactory { - ENABLE_FACTORY_CREATOR(InvertedIndexQueryParamFactory); - -public: - virtual ~InvertedIndexQueryParamFactory() = default; - - template - static Status create_query_value( - const ValueType* value, std::unique_ptr& result_param) { - static_assert(!std::is_same_v, - "ValueType cannot be void, as it is unsupported and dangerous."); - - using CPP_TYPE = typename PrimitiveTypeTraits::CppType; - std::unique_ptr> param = - InvertedIndexQueryParam::create_unique(); - - if constexpr (is_string_type(PT)) { - if constexpr (std::is_same_v) { - const auto& str = value->template get(); - param->set_value(str); - } else if constexpr (std::is_same_v) { - param->set_value(value); - } else { - static_assert(std::is_convertible_v, - "ValueType must be convertible to std::string for string types"); - param->set_value(std::string(*value)); - } - } else { - CPP_TYPE cpp_val; - if constexpr (std::is_same_v) { - auto field_val = value->template get(); - cpp_val = static_cast(field_val); - } else { - cpp_val = static_cast(*value); - } - - auto storage_val = PrimitiveTypeConvertor::to_storage_field_type(cpp_val); - param->set_value(&storage_val); - } - result_param = std::move(param); - return Status::OK(); - } - - static Status create_query_value( - const PrimitiveType& primitiveType, const doris::Field* value, - std::unique_ptr& result_param) { - switch (primitiveType) { -#define M(TYPE) \ - case TYPE: { \ - return create_query_value(value, result_param); \ - } - M(PrimitiveType::TYPE_BOOLEAN) - M(PrimitiveType::TYPE_TINYINT) - M(PrimitiveType::TYPE_SMALLINT) - M(PrimitiveType::TYPE_INT) - M(PrimitiveType::TYPE_BIGINT) - M(PrimitiveType::TYPE_LARGEINT) - M(PrimitiveType::TYPE_FLOAT) - M(PrimitiveType::TYPE_DOUBLE) - M(PrimitiveType::TYPE_DECIMALV2) - M(PrimitiveType::TYPE_DECIMAL32) - M(PrimitiveType::TYPE_DECIMAL64) - M(PrimitiveType::TYPE_DECIMAL128I) - M(PrimitiveType::TYPE_DECIMAL256) - M(PrimitiveType::TYPE_DATE) - M(PrimitiveType::TYPE_DATETIME) - M(PrimitiveType::TYPE_CHAR) - M(PrimitiveType::TYPE_VARCHAR) - M(PrimitiveType::TYPE_STRING) - M(PrimitiveType::TYPE_DATEV2) - M(PrimitiveType::TYPE_DATETIMEV2) - M(PrimitiveType::TYPE_IPV4) - M(PrimitiveType::TYPE_IPV6) -#undef M - default: - return Status::NotSupported("Unsupported primitive type {} for inverted index reader", - primitiveType); - } - }; - - virtual const void* get_value() const { - LOG_FATAL( - "Execution reached an undefined behavior code path in " - "InvertedIndexQueryParamFactory"); - __builtin_unreachable(); - }; -}; - -template -class InvertedIndexQueryParam : public InvertedIndexQueryParamFactory { - ENABLE_FACTORY_CREATOR(InvertedIndexQueryParam); - using storage_val = typename PrimitiveTypeTraits::StorageFieldType; - -public: - void set_value(const storage_val* value) { _value = *value; } - - const void* get_value() const override { return &_value; } - -private: - storage_val _value; -}; - -template - requires(is_string_type(PT)) -class InvertedIndexQueryParam : public InvertedIndexQueryParamFactory { - ENABLE_FACTORY_CREATOR(InvertedIndexQueryParam); - -public: - void set_value(const std::string& value) { _value = value; } - void set_value(const StringRef* value) { _value.assign(value->data, value->size); } - - const void* get_value() const override { return &_value; } - -private: - std::string _value; -}; - } // namespace segment_v2 } // namespace doris diff --git a/be/src/storage/predicate/comparison_predicate.h b/be/src/storage/predicate/comparison_predicate.h index c195b15fefacda..a4e2f84d937ac3 100644 --- a/be/src/storage/predicate/comparison_predicate.h +++ b/be/src/storage/predicate/comparison_predicate.h @@ -24,6 +24,7 @@ #include "core/column/column_dictionary.h" #include "storage/index/bloom_filter/bloom_filter.h" #include "storage/index/inverted/inverted_index_cache.h" // IWYU pragma: keep +#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/inverted_index_reader.h" #include "storage/predicate/column_predicate.h" @@ -92,14 +93,14 @@ class ComparisonPredicateBase final : public ColumnPredicate { return Status::InvalidArgument("invalid comparison predicate type {}", PT); } - std::unique_ptr query_param = nullptr; + std::unique_ptr query_param = nullptr; RETURN_IF_ERROR( InvertedIndexQueryParamFactory::create_query_value(&_value, query_param)); InvertedIndexParam param; param.column_name = name_with_type.first; param.column_type = name_with_type.second; - param.query_value = query_param->get_value(); + param.query_value = std::move(query_param); param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/storage/predicate/in_list_predicate.h b/be/src/storage/predicate/in_list_predicate.h index 56463879185ee0..eded9322da2011 100644 --- a/be/src/storage/predicate/in_list_predicate.h +++ b/be/src/storage/predicate/in_list_predicate.h @@ -34,6 +34,7 @@ #include "exprs/hybrid_set.h" #include "storage/index/bloom_filter/bloom_filter.h" #include "storage/index/inverted/inverted_index_cache.h" // IWYU pragma: keep +#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/inverted_index_reader.h" #include "storage/olap_common.h" #include "storage/predicate/column_predicate.h" @@ -161,13 +162,12 @@ class InListPredicateBase final : public ColumnPredicate { roaring::Roaring indices; HybridSetBase::IteratorBase* iter = _values->begin(); while (iter->has_next()) { - std::unique_ptr query_param = nullptr; + std::unique_ptr query_param = nullptr; if constexpr (is_string_type(Type)) { - // get_value() returns StringRef*, not std::string* + // HybridSet's iter->get_value() yields StringRef*, not std::string*. const auto* ref = (const StringRef*)(iter->get_value()); - T str(ref->data, ref->size); - RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( - &str, query_param)); + RETURN_IF_ERROR( + InvertedIndexQueryParamFactory::create_query_value(ref, query_param)); } else { const T* value = (const T*)(iter->get_value()); RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( @@ -177,7 +177,7 @@ class InListPredicateBase final : public ColumnPredicate { InvertedIndexParam param; param.column_name = name_with_type.first; param.column_type = name_with_type.second; - param.query_value = query_param->get_value(); + param.query_value = std::move(query_param); param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/storage/types.cpp b/be/src/storage/types.cpp index c5f12ee781fad6..e2137efc3825c3 100644 --- a/be/src/storage/types.cpp +++ b/be/src/storage/types.cpp @@ -26,8 +26,6 @@ namespace doris { -void (*FieldTypeTraits::set_to_max)(void*) = nullptr; - static TypeInfoPtr create_type_info_ptr(const TypeInfo* type_info, bool should_reclaim_memory); bool is_scalar_type(FieldType field_type) { diff --git a/be/src/storage/types.h b/be/src/storage/types.h index 81fec759089b8f..e33c81751b9086 100644 --- a/be/src/storage/types.h +++ b/be/src/storage/types.h @@ -69,9 +69,6 @@ class TypeInfo { virtual ~TypeInfo() = default; virtual int cmp(const void* left, const void* right) const = 0; - virtual void set_to_max(void* buf) const = 0; - virtual void set_to_min(void* buf) const = 0; - virtual size_t size() const = 0; virtual FieldType type() const = 0; @@ -81,8 +78,6 @@ class ScalarTypeInfo : public TypeInfo { public: int cmp(const void* left, const void* right) const override { return _cmp(left, right); } - void set_to_max(void* buf) const override { _set_to_max(buf); } - void set_to_min(void* buf) const override { _set_to_min(buf); } size_t size() const override { return _size; } FieldType type() const override { return _field_type; } @@ -90,17 +85,12 @@ class ScalarTypeInfo : public TypeInfo { template ScalarTypeInfo(TypeTraitsClass t) : _cmp(TypeTraitsClass::cmp), - _set_to_max(TypeTraitsClass::set_to_max), - _set_to_min(TypeTraitsClass::set_to_min), _size(TypeTraitsClass::size), _field_type(TypeTraitsClass::type) {} private: int (*_cmp)(const void* left, const void* right); - void (*_set_to_max)(void* buf); - void (*_set_to_min)(void* buf); - const size_t _size; const FieldType _field_type; @@ -158,14 +148,6 @@ class ArrayTypeInfo : public TypeInfo { } } - void set_to_max(void* buf) const override { - DCHECK(false) << "set_to_max of list is not implemented."; - } - - void set_to_min(void* buf) const override { - DCHECK(false) << "set_to_min of list is not implemented."; - } - size_t size() const override { return sizeof(CollectionValue); } FieldType type() const override { return FieldType::OLAP_FIELD_TYPE_ARRAY; } @@ -209,14 +191,6 @@ class MapTypeInfo : public TypeInfo { } } - void set_to_max(void* buf) const override { - DCHECK(false) << "set_to_max of list is not implemented."; - } - - void set_to_min(void* buf) const override { - DCHECK(false) << "set_to_min of list is not implemented."; - } - size_t size() const override { return sizeof(MapValue); } FieldType type() const override { return FieldType::OLAP_FIELD_TYPE_MAP; } @@ -282,14 +256,6 @@ class StructTypeInfo : public TypeInfo { } } - void set_to_max(void* buf) const override { - DCHECK(false) << "set_to_max of list is not implemented."; - } - - void set_to_min(void* buf) const override { - DCHECK(false) << "set_to_min of list is not implemented."; - } - size_t size() const override { return sizeof(StructValue); } FieldType type() const override { return FieldType::OLAP_FIELD_TYPE_STRUCT; } @@ -509,14 +475,6 @@ struct BaseFieldTypeTraits : public CppTypeTraits { return 0; } } - - static inline void set_to_max(void* buf) { - set_cpp_type_value(buf, type_limit::max()); - } - - static inline void set_to_min(void* buf) { - set_cpp_type_value(buf, type_limit::min()); - } }; // Using NumericFieldtypeTraits to Derived code for FieldType::OLAP_FIELD_TYPE_XXXINT, FieldType::OLAP_FIELD_TYPE_FLOAT, @@ -538,122 +496,43 @@ struct FieldTypeTraits template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { (*(uint8_t*)buf) = 1; } - static void set_to_min(void* buf) { (*(uint8_t*)buf) = 0; } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public NumericFieldTypeTraits { - static void set_to_max(void* buf) { - *reinterpret_cast(buf) = ~((int128_t)(1) << 127); - } - static void set_to_min(void* buf) { - *reinterpret_cast(buf) = (int128_t)(1) << 127; - } -}; + : public NumericFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - *reinterpret_cast(buf) = 0xFFFFFFFF; // 255.255.255.255 - } - - static void set_to_min(void* buf) { - *reinterpret_cast(buf) = 0; // 0.0.0.0 - } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - *reinterpret_cast(buf) = -1; // ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff - } - - static void set_to_min(void* buf) { - *reinterpret_cast(buf) = 0; // :: - } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - CppType* data = reinterpret_cast(buf); - data->integer = 999999999999999999L; - data->fraction = 999999999; - } - static void set_to_min(void* buf) { - CppType* data = reinterpret_cast(buf); - data->integer = -999999999999999999; - data->fraction = -999999999; - } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - // max is 9999 * 16 * 32 + 12 * 32 + 31; - *reinterpret_cast(buf) = 5119903; - } - static void set_to_min(void* buf) { - // min is 0 * 16 * 32 + 1 * 32 + 1; - *reinterpret_cast(buf) = 33; - } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - // max is 9999 * 16 * 32 + 12 * 32 + 31; - *reinterpret_cast(buf) = MAX_DATE_V2; - } - static void set_to_min(void* buf) { - // min is 0 * 16 * 32 + 1 * 32 + 1; - *reinterpret_cast(buf) = MIN_DATE_V2; - } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - // max is 9999 * 16 * 32 + 12 * 32 + 31; - *reinterpret_cast(buf) = MAX_DATETIME_V2; - } - static void set_to_min(void* buf) { - // min is 0 * 16 * 32 + 1 * 32 + 1; - *reinterpret_cast(buf) = MIN_DATETIME_V2; - } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - // 9999-12-31 23:59:59 - *reinterpret_cast(buf) = 99991231235959L; - } - static void set_to_min(void* buf) { *reinterpret_cast(buf) = 101000000; } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - // max is 9999 * 16 * 32 + 12 * 32 + 31; - *reinterpret_cast(buf) = MAX_DATETIME_V2; - } - static void set_to_min(void* buf) { - // min is 0 * 16 * 32 + 1 * 32 + 1; - *reinterpret_cast(buf) = MIN_DATETIME_V2; - } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits @@ -663,32 +542,15 @@ struct FieldTypeTraits auto r_slice = reinterpret_cast(right); return l_slice->compare(*r_slice); } - // Using field.set_to_max to set varchar/char,not here. - static void (*set_to_max)(void*); - - static void set_to_min(void* buf) { - auto slice = reinterpret_cast(buf); - memset(slice->data, 0, slice->size); - } }; template <> struct FieldTypeTraits - : public FieldTypeTraits { - static void set_to_min(void* buf) { - auto slice = reinterpret_cast(buf); - slice->size = 0; - } -}; + : public FieldTypeTraits {}; template <> struct FieldTypeTraits - : public FieldTypeTraits { - static void set_to_min(void* buf) { - auto slice = reinterpret_cast(buf); - slice->size = 0; - } -}; + : public FieldTypeTraits {}; template <> struct FieldTypeTraits @@ -697,16 +559,6 @@ struct FieldTypeTraits LOG(WARNING) << "can not compare JSONB values"; return -1; // always update ? } - - static void set_to_min(void* buf) { - auto slice = reinterpret_cast(buf); - slice->size = 0; - } - - static void set_to_max(void* buf) { - auto slice = reinterpret_cast(buf); - slice->size = 0; - } }; template <> diff --git a/be/test/exprs/function/function_comparison_evaluate_inverted_index_test.cpp b/be/test/exprs/function/function_comparison_evaluate_inverted_index_test.cpp index dc50d0d980385a..96b4be0d76bac4 100644 --- a/be/test/exprs/function/function_comparison_evaluate_inverted_index_test.cpp +++ b/be/test/exprs/function/function_comparison_evaluate_inverted_index_test.cpp @@ -38,14 +38,16 @@ class MockInvertedIndexReader : public segment_v2::InvertedIndexReader { } Status query(const segment_v2::IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, segment_v2::InvertedIndexQueryType query_type, + const segment_v2::InvertedIndexQueryParam* query_value, + segment_v2::InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override { return Status::OK(); } Status try_query(const segment_v2::IndexQueryContextPtr& context, - const std::string& column_name, const void* query_value, + const std::string& column_name, + const segment_v2::InvertedIndexQueryParam* query_value, segment_v2::InvertedIndexQueryType query_type, size_t* count) override { return Status::OK(); } diff --git a/be/test/exprs/function/function_ip_test.cpp b/be/test/exprs/function/function_ip_test.cpp index 1b36d18cfa1b09..206a5e6cba2a39 100644 --- a/be/test/exprs/function/function_ip_test.cpp +++ b/be/test/exprs/function/function_ip_test.cpp @@ -172,13 +172,15 @@ class MockIndexReader : public segment_v2::InvertedIndexReader { return segment_v2::InvertedIndexReaderType::BKD; } Status query(const segment_v2::IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, segment_v2::InvertedIndexQueryType query_type, + const segment_v2::InvertedIndexQueryParam* query_value, + segment_v2::InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override { return Status::OK(); } Status try_query(const segment_v2::IndexQueryContextPtr& context, - const std::string& column_name, const void* query_value, + const std::string& column_name, + const segment_v2::InvertedIndexQueryParam* query_value, segment_v2::InvertedIndexQueryType query_type, size_t* count) override { return Status::OK(); } diff --git a/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp b/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp index 93670029927b6e..f9f97cf26bb143 100644 --- a/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp +++ b/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp @@ -31,6 +31,7 @@ #include "json2pb/pb_to_json.h" #include "storage/compaction/base_compaction.h" #include "storage/index/index_file_reader.h" +#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/query/query_factory.h" #include "storage/rowset/beta_rowset.h" #include "storage/rowset/beta_rowset_writer.h" @@ -163,7 +164,7 @@ class IndexCompactionUtils { for (int i = 0; i < query_data.size(); i++) { Field param_value = Field::create_field(int32_t(query_data[i])); - std::unique_ptr query_param = nullptr; + std::unique_ptr query_param = nullptr; EXPECT_TRUE(segment_v2::InvertedIndexQueryParamFactory::create_query_value( PrimitiveType::TYPE_INT, ¶m_value, query_param) .ok()); @@ -173,8 +174,11 @@ class IndexCompactionUtils { IndexQueryContextPtr context = std::make_shared(); context->stats = &stats; + const auto* num_param = + dynamic_cast(query_param.get()); + EXPECT_NE(num_param, nullptr); EXPECT_TRUE(idx_reader - ->invoke_bkd_query(context, query_param->get_value(), + ->invoke_bkd_query(context, num_param, InvertedIndexQueryType::EQUAL_QUERY, *bkd_searcher, result) .ok()); diff --git a/be/test/storage/segment/index_reader_helper_test.cpp b/be/test/storage/segment/index_reader_helper_test.cpp index d96c4efeb13f6a..7d3c47b00b8730 100644 --- a/be/test/storage/segment/index_reader_helper_test.cpp +++ b/be/test/storage/segment/index_reader_helper_test.cpp @@ -63,14 +63,16 @@ class MockInvertedIndexReader : public InvertedIndexReader { } MOCK_FUNCTION Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const InvertedIndexQueryParam* query_value, + InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override { return Status::OK(); } MOCK_FUNCTION Status try_query(const IndexQueryContextPtr& context, - const std::string& column_name, const void* query_value, + const std::string& column_name, + const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, size_t* count) override { return Status::OK(); } diff --git a/be/test/storage/segment/inverted_index_iterator_test.cpp b/be/test/storage/segment/inverted_index_iterator_test.cpp index 7deffc0fdbdc69..bf40089fc3e964 100644 --- a/be/test/storage/segment/inverted_index_iterator_test.cpp +++ b/be/test/storage/segment/inverted_index_iterator_test.cpp @@ -54,14 +54,14 @@ class MockInvertedIndexReader : public InvertedIndexReader { } Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, std::shared_ptr& roaring, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override { return Status::OK(); } Status try_query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, size_t* count) override { *count = 0; return Status::OK(); diff --git a/be/test/storage/segment/inverted_index_query_param_test.cpp b/be/test/storage/segment/inverted_index_query_param_test.cpp index 4f00d625e9b63d..e28e917c6b82a6 100644 --- a/be/test/storage/segment/inverted_index_query_param_test.cpp +++ b/be/test/storage/segment/inverted_index_query_param_test.cpp @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#include "storage/index/inverted/inverted_index_query_param.h" + #include #include "common/status.h" @@ -34,133 +36,144 @@ class InvertedIndexQueryParamTest : public testing::Test { TEST_F(InvertedIndexQueryParamTest, TestBooleanWithField) { auto field = Field::create_field(static_cast(1)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_BOOLEAN, &field, param); ASSERT_TRUE(status.ok()); ASSERT_NE(param, nullptr); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, true); } TEST_F(InvertedIndexQueryParamTest, TestBooleanWithFieldFalse) { auto field = Field::create_field(static_cast(0)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_BOOLEAN, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, false); } TEST_F(InvertedIndexQueryParamTest, TestBooleanTemplateWithNativeValue) { bool input_value = true; - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value( &input_value, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, true); } TEST_F(InvertedIndexQueryParamTest, TestTinyIntWithField) { auto field = Field::create_field(static_cast(42)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_TINYINT, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, 42); } TEST_F(InvertedIndexQueryParamTest, TestTinyIntTemplateWithNativeValue) { int8_t input_value = -100; - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value( &input_value, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, -100); } TEST_F(InvertedIndexQueryParamTest, TestSmallIntWithField) { auto field = Field::create_field(static_cast(1234)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_SMALLINT, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, 1234); } TEST_F(InvertedIndexQueryParamTest, TestSmallIntTemplateWithNativeValue) { int16_t input_value = -32000; - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value( &input_value, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, -32000); } TEST_F(InvertedIndexQueryParamTest, TestIntWithField) { auto field = Field::create_field(static_cast(123456)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_INT, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = &static_cast*>(param.get())->value(); EXPECT_EQ(*value, 123456); } TEST_F(InvertedIndexQueryParamTest, TestIntTemplateWithNativeValue) { int32_t input_value = -2147483647; - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value( &input_value, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = &static_cast*>(param.get())->value(); EXPECT_EQ(*value, -2147483647); } TEST_F(InvertedIndexQueryParamTest, TestBigIntWithField) { auto field = Field::create_field(static_cast(9223372036854775807LL)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_BIGINT, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, 9223372036854775807LL); } TEST_F(InvertedIndexQueryParamTest, TestBigIntTemplateWithNativeValue) { int64_t input_value = -9223372036854775807LL; - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value( &input_value, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, -9223372036854775807LL); } TEST_F(InvertedIndexQueryParamTest, TestLargeIntWithField) { Int128 large_value = 12345678901234567890ULL; auto field = Field::create_field(large_value); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_LARGEINT, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, static_cast<__int128_t>(large_value)); } TEST_F(InvertedIndexQueryParamTest, TestLargeIntTemplateWithNativeValue) { __int128_t input_value = 12345678901234567890ULL; - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value( &input_value, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, input_value); } @@ -168,41 +181,45 @@ TEST_F(InvertedIndexQueryParamTest, TestLargeIntTemplateWithNativeValue) { TEST_F(InvertedIndexQueryParamTest, TestFloatWithField) { auto field = Field::create_field(static_cast(3.14f)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_FLOAT, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_FLOAT_EQ(*value, 3.14f); } TEST_F(InvertedIndexQueryParamTest, TestFloatTemplateWithNativeValue) { float input_value = -1.23456f; - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value( &input_value, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_FLOAT_EQ(*value, -1.23456f); } TEST_F(InvertedIndexQueryParamTest, TestDoubleWithField) { auto field = Field::create_field(static_cast(3.14159265358979)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DOUBLE, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_DOUBLE_EQ(*value, 3.14159265358979); } TEST_F(InvertedIndexQueryParamTest, TestDoubleTemplateWithNativeValue) { double input_value = -9.87654321e10; - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value( &input_value, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_DOUBLE_EQ(*value, -9.87654321e10); } @@ -211,88 +228,96 @@ TEST_F(InvertedIndexQueryParamTest, TestDoubleTemplateWithNativeValue) { TEST_F(InvertedIndexQueryParamTest, TestCharWithField) { String str = "hello"; auto field = Field::create_field(str); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_CHAR, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, "hello"); } TEST_F(InvertedIndexQueryParamTest, TestVarcharWithField) { String str = "world"; auto field = Field::create_field(str); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_VARCHAR, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, "world"); } TEST_F(InvertedIndexQueryParamTest, TestStringWithField) { String str = "test string content"; auto field = Field::create_field(str); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRING, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, "test string content"); } TEST_F(InvertedIndexQueryParamTest, TestStringTemplateWithStringRef) { std::string str_data = "string ref test"; StringRef str_ref(str_data.data(), str_data.size()); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value( &str_ref, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, "string ref test"); } TEST_F(InvertedIndexQueryParamTest, TestVarcharTemplateWithStringRef) { std::string str_data = "varchar ref test"; StringRef str_ref(str_data.data(), str_data.size()); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value( &str_ref, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, "varchar ref test"); } TEST_F(InvertedIndexQueryParamTest, TestCharTemplateWithStringRef) { std::string str_data = "char ref test"; StringRef str_ref(str_data.data(), str_data.size()); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(&str_ref, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, "char ref test"); } TEST_F(InvertedIndexQueryParamTest, TestStringWithEmptyValue) { String str = ""; auto field = Field::create_field(str); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRING, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, ""); } TEST_F(InvertedIndexQueryParamTest, TestStringWithSpecialCharacters) { String str = "hello\nworld\t!@#$%^&*()"; auto field = Field::create_field(str); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRING, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, "hello\nworld\t!@#$%^&*()"); } @@ -302,7 +327,7 @@ TEST_F(InvertedIndexQueryParamTest, TestDecimalV2WithField) { // DecimalV2 uses Int128 as underlying storage Int128 dec_value = 123456789; auto field = Field::create_field(DecimalV2Value(dec_value)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DECIMALV2, &field, param); ASSERT_TRUE(status.ok()); @@ -313,7 +338,7 @@ TEST_F(InvertedIndexQueryParamTest, TestDecimal32WithField) { // Decimal32 uses Int64 for Field storage Int64 dec_value = 12345; auto field = Field::create_field(dec_value); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DECIMAL32, &field, param); ASSERT_TRUE(status.ok()); @@ -324,7 +349,7 @@ TEST_F(InvertedIndexQueryParamTest, TestDecimal64WithField) { // Decimal64 uses Int64 for Field storage Int64 dec_value = 123456789012; auto field = Field::create_field(dec_value); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DECIMAL64, &field, param); ASSERT_TRUE(status.ok()); @@ -335,7 +360,7 @@ TEST_F(InvertedIndexQueryParamTest, TestDecimal128IWithField) { // Decimal128I uses Int128 for Field storage Int128 dec_value = 123456789012345LL; auto field = Field::create_field(dec_value); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value( PrimitiveType::TYPE_DECIMAL128I, &field, param); ASSERT_TRUE(status.ok()); @@ -346,7 +371,7 @@ TEST_F(InvertedIndexQueryParamTest, TestDecimal256WithField) { // Decimal256 uses Int128 for Field storage Int128 dec_value = 123456789012345LL; auto field = Field::create_field(Decimal(dec_value)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DECIMAL256, &field, param); ASSERT_TRUE(status.ok()); @@ -359,7 +384,7 @@ TEST_F(InvertedIndexQueryParamTest, TestDateWithField) { VecDateTimeValue tmp; tmp.from_date_int64(20231205); auto field = Field::create_field(tmp); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DATE, &field, param); ASSERT_TRUE(status.ok()); @@ -370,7 +395,7 @@ TEST_F(InvertedIndexQueryParamTest, TestDateTimeWithField) { VecDateTimeValue tmp; tmp.create_from_olap_datetime(20231205120000LL); auto field = Field::create_field(tmp); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DATETIME, &field, param); ASSERT_TRUE(status.ok()); @@ -382,7 +407,7 @@ TEST_F(InvertedIndexQueryParamTest, TestDateV2WithField) { typename PrimitiveTypeTraits::CppType tmp; tmp.from_date_int64(v); auto field = Field::create_field(tmp); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DATEV2, &field, param); ASSERT_TRUE(status.ok()); @@ -393,7 +418,7 @@ TEST_F(InvertedIndexQueryParamTest, TestDateTimeV2WithField) { UInt64 v = 20231205120000LL; auto field = Field::create_field( *(typename PrimitiveTypeTraits::CppType*)&v); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DATETIMEV2, &field, param); ASSERT_TRUE(status.ok()); @@ -404,42 +429,46 @@ TEST_F(InvertedIndexQueryParamTest, TestDateTimeV2WithField) { TEST_F(InvertedIndexQueryParamTest, TestIPv4WithField) { auto field = Field::create_field(IPv4(3232235521)); // 192.168.0.1 - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_IPV4, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, IPv4(3232235521)); } TEST_F(InvertedIndexQueryParamTest, TestIPv4TemplateWithNativeValue) { IPv4 input_value(2130706433); // 127.0.0.1 - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(&input_value, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, IPv4(2130706433)); } TEST_F(InvertedIndexQueryParamTest, TestIPv6WithField) { IPv6 ipv6_value = 1; auto field = Field::create_field(ipv6_value); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_IPV6, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, ipv6_value); } TEST_F(InvertedIndexQueryParamTest, TestIPv6TemplateWithNativeValue) { IPv6 input_value = 12345678901234567890ULL; - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(&input_value, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, input_value); } @@ -447,7 +476,7 @@ TEST_F(InvertedIndexQueryParamTest, TestIPv6TemplateWithNativeValue) { TEST_F(InvertedIndexQueryParamTest, TestUnsupportedType) { auto field = Field::create_field(static_cast(0)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_ARRAY, &field, param); ASSERT_FALSE(status.ok()); @@ -456,7 +485,7 @@ TEST_F(InvertedIndexQueryParamTest, TestUnsupportedType) { TEST_F(InvertedIndexQueryParamTest, TestUnsupportedTypeMap) { auto field = Field::create_field(static_cast(0)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_MAP, &field, param); ASSERT_FALSE(status.ok()); @@ -464,7 +493,7 @@ TEST_F(InvertedIndexQueryParamTest, TestUnsupportedTypeMap) { TEST_F(InvertedIndexQueryParamTest, TestUnsupportedTypeStruct) { auto field = Field::create_field(static_cast(0)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRUCT, &field, param); ASSERT_FALSE(status.ok()); @@ -476,20 +505,22 @@ TEST_F(InvertedIndexQueryParamTest, TestIntegerBoundaryMin) { // Test minimum values { auto field = Field::create_field(static_cast(-128)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value( PrimitiveType::TYPE_TINYINT, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, -128); } { auto field = Field::create_field(static_cast(-32768)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value( PrimitiveType::TYPE_SMALLINT, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, -32768); } } @@ -498,20 +529,22 @@ TEST_F(InvertedIndexQueryParamTest, TestIntegerBoundaryMax) { // Test maximum values { auto field = Field::create_field(static_cast(127)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value( PrimitiveType::TYPE_TINYINT, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, 127); } { auto field = Field::create_field(static_cast(32767)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value( PrimitiveType::TYPE_SMALLINT, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, 32767); } } @@ -520,20 +553,22 @@ TEST_F(InvertedIndexQueryParamTest, TestZeroValues) { // Test zero values for different types { auto field = Field::create_field(static_cast(0)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_INT, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, 0); } { auto field = Field::create_field(static_cast(0.0)); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DOUBLE, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_DOUBLE_EQ(*value, 0.0); } } @@ -543,22 +578,24 @@ TEST_F(InvertedIndexQueryParamTest, TestFloatSpecialValues) { { auto field = Field::create_field( static_cast(std::numeric_limits::infinity())); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DOUBLE, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_TRUE(std::isinf(*value)); } // Test negative infinity { auto field = Field::create_field( static_cast(-std::numeric_limits::infinity())); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DOUBLE, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_TRUE(std::isinf(*value)); EXPECT_LT(*value, 0); } @@ -567,11 +604,12 @@ TEST_F(InvertedIndexQueryParamTest, TestFloatSpecialValues) { TEST_F(InvertedIndexQueryParamTest, TestStringWithUnicodeCharacters) { String str = "你好世界 🌍 日本語"; auto field = Field::create_field(str); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRING, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(*value, "你好世界 🌍 日本語"); } @@ -579,11 +617,12 @@ TEST_F(InvertedIndexQueryParamTest, TestLongString) { std::string long_str(10000, 'x'); String str(long_str); auto field = Field::create_field(str); - std::unique_ptr param; + std::unique_ptr param; auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRING, &field, param); ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); + const auto* value = + &static_cast*>(param.get())->value(); EXPECT_EQ(value->size(), 10000); EXPECT_EQ(*value, long_str); } diff --git a/be/test/storage/segment/inverted_index_reader_test.cpp b/be/test/storage/segment/inverted_index_reader_test.cpp index 3cbe01865304fb..3f575b6039254c 100644 --- a/be/test/storage/segment/inverted_index_reader_test.cpp +++ b/be/test/storage/segment/inverted_index_reader_test.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -34,7 +35,7 @@ #include "storage/index/index_file_writer.h" #include "storage/index/inverted/inverted_index_desc.h" #include "storage/index/inverted/inverted_index_iterator.h" -#include "storage/index/inverted/inverted_index_reader.h" +#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/inverted_index_writer.h" #include "storage/tablet/tablet_schema.h" #include "storage/tablet/tablet_schema_helper.h" @@ -332,7 +333,9 @@ class InvertedIndexReaderTest : public testing::Test { context->io_ctx = &io_ctx; context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = str_reader->query(context, field_name, &str_ref, + auto qp_335 = TypedInvertedIndexQueryParam::create_unique(); + qp_335->set_value(&str_ref); + auto query_status = str_reader->query(context, field_name, qp_335.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -345,7 +348,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string not_exist = "orange"; StringRef not_exist_ref(not_exist.c_str(), not_exist.length()); - query_status = str_reader->query(context, field_name, ¬_exist_ref, + auto qp_348 = TypedInvertedIndexQueryParam::create_unique(); + qp_348->set_value(¬_exist_ref); + query_status = str_reader->query(context, field_name, qp_348.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -438,7 +443,9 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = bkd_reader->query(context, field_name, &query_value, + auto qp_441 = TypedInvertedIndexQueryParam::create_unique(); + qp_441->set_value(&query_value); + auto query_status = bkd_reader->query(context, field_name, qp_441.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -450,7 +457,9 @@ class InvertedIndexReaderTest : public testing::Test { bitmap = std::make_shared(); int32_t less_than_value = 100; - query_status = bkd_reader->query(context, field_name, &less_than_value, + auto qp_453 = TypedInvertedIndexQueryParam::create_unique(); + qp_453->set_value(&less_than_value); + query_status = bkd_reader->query(context, field_name, qp_453.get(), InvertedIndexQueryType::LESS_THAN_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -462,7 +471,9 @@ class InvertedIndexReaderTest : public testing::Test { bitmap = std::make_shared(); int32_t greater_than_value = 100; - query_status = bkd_reader->query(context, field_name, &greater_than_value, + auto qp_465 = TypedInvertedIndexQueryParam::create_unique(); + qp_465->set_value(&greater_than_value); + query_status = bkd_reader->query(context, field_name, qp_465.get(), InvertedIndexQueryType::GREATER_THAN_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -521,7 +532,9 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = str_reader->query(context, field_name, &str_ref, + auto qp_524 = TypedInvertedIndexQueryParam::create_unique(); + qp_524->set_value(&str_ref); + auto query_status = str_reader->query(context, field_name, qp_524.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap1); EXPECT_TRUE(query_status.ok()) << query_status; @@ -531,7 +544,9 @@ class InvertedIndexReaderTest : public testing::Test { // Second query with same value, should be cache hit std::shared_ptr bitmap2 = std::make_shared(); - query_status = str_reader->query(context, field_name, &str_ref, + auto qp_534 = TypedInvertedIndexQueryParam::create_unique(); + qp_534->set_value(&str_ref); + query_status = str_reader->query(context, field_name, qp_534.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap2); EXPECT_TRUE(query_status.ok()) << query_status; @@ -589,7 +604,9 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = str_reader->query(context, field_name, &str_ref, + auto qp_592 = TypedInvertedIndexQueryParam::create_unique(); + qp_592->set_value(&str_ref); + auto query_status = str_reader->query(context, field_name, qp_592.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap1); EXPECT_TRUE(query_status.ok()) << query_status; @@ -600,7 +617,9 @@ class InvertedIndexReaderTest : public testing::Test { std::shared_ptr bitmap2 = std::make_shared(); StringRef str_ref2(values[1].data, values[1].size); // "banana" - query_status = str_reader->query(context, field_name, &str_ref2, + auto qp_603 = TypedInvertedIndexQueryParam::create_unique(); + qp_603->set_value(&str_ref2); + query_status = str_reader->query(context, field_name, qp_603.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap2); EXPECT_TRUE(query_status.ok()) << query_status; @@ -657,7 +676,9 @@ class InvertedIndexReaderTest : public testing::Test { std::shared_ptr bitmap = std::make_shared(); StringRef term_ref(term.data(), term.size()); - auto status = str_reader->query(context, field_name, &term_ref, + auto qp_660 = TypedInvertedIndexQueryParam::create_unique(); + qp_660->set_value(&term_ref); + auto status = str_reader->query(context, field_name, qp_660.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()) << status; EXPECT_EQ(1, bitmap->cardinality()); @@ -788,7 +809,9 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = str_reader->query(context, field_name, &str_ref, + auto qp_791 = TypedInvertedIndexQueryParam::create_unique(); + qp_791->set_value(&str_ref); + auto query_status = str_reader->query(context, field_name, qp_791.get(), InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -803,7 +826,9 @@ class InvertedIndexReaderTest : public testing::Test { query_term = "apple"; StringRef str_ref_a(query_term.c_str(), query_term.length()); - query_status = str_reader->query(context, field_name, &str_ref_a, + auto qp_806 = TypedInvertedIndexQueryParam::create_unique(); + qp_806->set_value(&str_ref_a); + query_status = str_reader->query(context, field_name, qp_806.get(), InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -883,8 +908,10 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; + auto qp_887 = TypedInvertedIndexQueryParam::create_unique(); + qp_887->set_value(&str_ref); auto query_status = - str_reader->query(context, field_name, &str_ref, + str_reader->query(context, field_name, qp_887.get(), InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -900,7 +927,9 @@ class InvertedIndexReaderTest : public testing::Test { query_term = "term_a"; StringRef str_ref_a(query_term.c_str(), query_term.length()); - query_status = str_reader->query(context, field_name, &str_ref_a, + auto qp_903 = TypedInvertedIndexQueryParam::create_unique(); + qp_903->set_value(&str_ref_a); + query_status = str_reader->query(context, field_name, qp_903.get(), InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -916,7 +945,9 @@ class InvertedIndexReaderTest : public testing::Test { query_term = "noexist"; StringRef str_ref_no_term(query_term.c_str(), query_term.length()); - query_status = str_reader->query(context, field_name, &str_ref_no_term, + auto qp_919 = TypedInvertedIndexQueryParam::create_unique(); + qp_919->set_value(&str_ref_no_term); + query_status = str_reader->query(context, field_name, qp_919.get(), InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; EXPECT_EQ(bitmap->cardinality(), 0) << "V3: Should find 0 documents matching 'noexist'"; @@ -965,7 +996,9 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = str_reader->query(context, field_name, &str_ref, + auto qp_968 = TypedInvertedIndexQueryParam::create_unique(); + qp_968->set_value(&str_ref); + auto query_status = str_reader->query(context, field_name, qp_968.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -981,7 +1014,9 @@ class InvertedIndexReaderTest : public testing::Test { query_term = "term_a"; StringRef str_ref_a(query_term.c_str(), query_term.length()); - query_status = str_reader->query(context, field_name, &str_ref_a, + auto qp_984 = TypedInvertedIndexQueryParam::create_unique(); + qp_984->set_value(&str_ref_a); + query_status = str_reader->query(context, field_name, qp_984.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -997,7 +1032,9 @@ class InvertedIndexReaderTest : public testing::Test { query_term = "noexist"; StringRef str_ref_no_term(query_term.c_str(), query_term.length()); - query_status = str_reader->query(context, field_name, &str_ref_no_term, + auto qp_1000 = TypedInvertedIndexQueryParam::create_unique(); + qp_1000->set_value(&str_ref_no_term); + query_status = str_reader->query(context, field_name, qp_1000.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; EXPECT_EQ(bitmap->cardinality(), 0) << "V3: Should find 0 documents matching 'noexist'"; @@ -1064,7 +1101,9 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = index_reader->query(context, field_name, &str_ref, + auto qp_1067 = TypedInvertedIndexQueryParam::create_unique(); + qp_1067->set_value(&str_ref); + auto query_status = index_reader->query(context, field_name, qp_1067.get(), InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); ASSERT_TRUE(query_status.ok()) << "Query failed for term '" << query_term << "' in file " @@ -2086,7 +2125,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string field_name = "1"; // c2 unique_id StringRef query_val(values[0].data, values[0].size); - Status st = mock_reader->query(context, field_name, &query_val, + auto qp_2089 = TypedInvertedIndexQueryParam::create_unique(); + qp_2089->set_value(&query_val); + Status st = mock_reader->query(context, field_name, qp_2089.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_FALSE(st.ok()); @@ -2148,7 +2189,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query_term = "world"; StringRef query_val(query_term.data(), query_term.size()); - Status st = mock_reader->query(context, field_name, &query_val, + auto qp_2151 = TypedInvertedIndexQueryParam::create_unique(); + qp_2151->set_value(&query_val); + Status st = mock_reader->query(context, field_name, qp_2151.get(), InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap); EXPECT_FALSE(st.ok()); @@ -2158,7 +2201,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string phrase_query = "Apache Doris"; StringRef phrase_query_val(phrase_query.data(), phrase_query.size()); - st = mock_reader->query(context, field_name, &phrase_query_val, + auto qp_2161 = TypedInvertedIndexQueryParam::create_unique(); + qp_2161->set_value(&phrase_query_val); + st = mock_reader->query(context, field_name, qp_2161.get(), InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_FALSE(st.ok()); @@ -2271,7 +2316,9 @@ class InvertedIndexReaderTest : public testing::Test { std::shared_ptr bitmap = std::make_shared(); std::string query_lower = "apple"; // lowercase StringRef str_ref(query_lower.c_str(), query_lower.length()); - auto status = str_reader->query(context, "c2", &str_ref, + auto qp_2274 = TypedInvertedIndexQueryParam::create_unique(); + qp_2274->set_value(&str_ref); + auto status = str_reader->query(context, "c2", qp_2274.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); EXPECT_GT(bitmap->cardinality(), 0) << "Should find 'Apple' with lowercase query"; @@ -2311,7 +2358,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string long_query = "this_is_a_very_long_string_that_exceeds_ignore_above_limit"; StringRef str_ref(long_query.c_str(), long_query.length()); - auto status = str_reader->query(context, "c2", &str_ref, + auto qp_2314 = TypedInvertedIndexQueryParam::create_unique(); + qp_2314->set_value(&str_ref); + auto status = str_reader->query(context, "c2", qp_2314.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_FALSE(status.ok()); EXPECT_EQ(status.code(), ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED); @@ -2373,7 +2422,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "quick database"; StringRef query_ref(query.c_str(), query.length()); - auto status = fulltext_reader->query(context, "c2", &query_ref, + auto qp_2376 = TypedInvertedIndexQueryParam::create_unique(); + qp_2376->set_value(&query_ref); + auto status = fulltext_reader->query(context, "c2", qp_2376.get(), InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap); EXPECT_TRUE(status.ok()); EXPECT_GT(bitmap->cardinality(), 0) @@ -2386,7 +2437,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "search fast"; StringRef query_ref(query.c_str(), query.length()); - auto status = fulltext_reader->query(context, "c2", &query_ref, + auto qp_2389 = TypedInvertedIndexQueryParam::create_unique(); + qp_2389->set_value(&query_ref); + auto status = fulltext_reader->query(context, "c2", qp_2389.get(), InvertedIndexQueryType::MATCH_ALL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -2397,8 +2450,10 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "quick brown"; StringRef query_ref(query.c_str(), query.length()); + auto qp = TypedInvertedIndexQueryParam::create_unique(); + qp->set_value(&query_ref); auto status = fulltext_reader->query( - context, "c2", &query_ref, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); + context, "c2", qp.get(), InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -2408,7 +2463,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "sear"; StringRef query_ref(query.c_str(), query.length()); - auto status = fulltext_reader->query(context, "c2", &query_ref, + auto qp_2411 = TypedInvertedIndexQueryParam::create_unique(); + qp_2411->set_value(&query_ref); + auto status = fulltext_reader->query(context, "c2", qp_2411.get(), InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY, bitmap); EXPECT_TRUE(status.ok()); @@ -2420,8 +2477,10 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "qu.*k"; StringRef query_ref(query.c_str(), query.length()); + auto qp = TypedInvertedIndexQueryParam::create_unique(); + qp->set_value(&query_ref); auto status = fulltext_reader->query( - context, "c2", &query_ref, InvertedIndexQueryType::MATCH_REGEXP_QUERY, bitmap); + context, "c2", qp.get(), InvertedIndexQueryType::MATCH_REGEXP_QUERY, bitmap); EXPECT_TRUE(status.ok()); } } @@ -2479,9 +2538,11 @@ class InvertedIndexReaderTest : public testing::Test { std::shared_ptr bitmap = std::make_shared(); StringRef str_ref("test1", 5); + auto param_qp = TypedInvertedIndexQueryParam::create_unique(); + param_qp->set_value(&str_ref); InvertedIndexParam param; param.column_name = "c2"; - param.query_value = &str_ref; + param.query_value = std::move(param_qp); param.query_type = InvertedIndexQueryType::EQUAL_QUERY; param.num_rows = 3; param.roaring = bitmap; @@ -2494,9 +2555,11 @@ class InvertedIndexReaderTest : public testing::Test { size_t count = 0; auto* inverted_index_iterator = static_cast(iterator.get()); inverted_index_iterator->set_context(context); + auto try_qp = TypedInvertedIndexQueryParam::create_unique(); + try_qp->set_value(&str_ref); status = inverted_index_iterator->try_read_from_inverted_index( std::static_pointer_cast(inverted_index_reader), "c2", - &str_ref, InvertedIndexQueryType::EQUAL_QUERY, &count); + try_qp.get(), InvertedIndexQueryType::EQUAL_QUERY, &count); EXPECT_TRUE(status.ok()); } @@ -2568,7 +2631,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "500"; StringRef str_ref(query.c_str(), query.length()); - auto status = str_reader->query(context, "c2", &str_ref, + auto qp_2571 = TypedInvertedIndexQueryParam::create_unique(); + qp_2571->set_value(&str_ref); + auto status = str_reader->query(context, "c2", qp_2571.get(), InvertedIndexQueryType::LESS_THAN_QUERY, bitmap); // This might succeed or fail depending on the implementation limits // The important thing is we handle the potential TooManyClauses error gracefully @@ -2605,7 +2670,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string empty_query = ""; StringRef str_ref(empty_query.c_str(), empty_query.length()); - auto status = fulltext_reader->query(context, "c2", &str_ref, + auto qp_2608 = TypedInvertedIndexQueryParam::create_unique(); + qp_2608->set_value(&str_ref); + auto status = fulltext_reader->query(context, "c2", qp_2608.get(), InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap); // Should either succeed with empty result or fail gracefully } @@ -2747,7 +2814,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "500"; StringRef str_ref(query.c_str(), query.length()); - auto status = str_reader->query(context, "c2", &str_ref, + auto qp_2750 = TypedInvertedIndexQueryParam::create_unique(); + qp_2750->set_value(&str_ref); + auto status = str_reader->query(context, "c2", qp_2750.get(), InvertedIndexQueryType::LESS_THAN_QUERY, bitmap); // This might succeed or fail depending on the implementation limits // The important thing is we handle the potential TooManyClauses error gracefully @@ -2784,7 +2853,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string empty_query; StringRef str_ref(empty_query.c_str(), empty_query.length()); - auto status = fulltext_reader->query(context, "c2", &str_ref, + auto qp_2787 = TypedInvertedIndexQueryParam::create_unique(); + qp_2787->set_value(&str_ref); + auto status = fulltext_reader->query(context, "c2", qp_2787.get(), InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap); // Should either succeed with empty result or fail gracefully } @@ -2835,8 +2906,10 @@ class InvertedIndexReaderTest : public testing::Test { std::string regexp_query = "test.*"; StringRef query_ref(regexp_query.c_str(), regexp_query.length()); + auto qp = TypedInvertedIndexQueryParam::create_unique(); + qp->set_value(&query_ref); auto status = fulltext_reader->query( - context, "c2", &query_ref, InvertedIndexQueryType::MATCH_REGEXP_QUERY, bitmap); + context, "c2", qp.get(), InvertedIndexQueryType::MATCH_REGEXP_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -2871,48 +2944,357 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "cherry"; StringRef str_ref(query.c_str(), query.length()); - auto status = str_reader->query(context, "c2", &str_ref, + auto qp_2874 = TypedInvertedIndexQueryParam::create_unique(); + qp_2874->set_value(&str_ref); + auto status = str_reader->query(context, "c2", qp_2874.get(), InvertedIndexQueryType::LESS_THAN_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test LESS_EQUAL_QUERY bitmap = std::make_shared(); - status = str_reader->query(context, "c2", &str_ref, + auto qp_2880 = TypedInvertedIndexQueryParam::create_unique(); + qp_2880->set_value(&str_ref); + status = str_reader->query(context, "c2", qp_2880.get(), InvertedIndexQueryType::LESS_EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test GREATER_THAN_QUERY bitmap = std::make_shared(); - status = str_reader->query(context, "c2", &str_ref, + auto qp_2886 = TypedInvertedIndexQueryParam::create_unique(); + qp_2886->set_value(&str_ref); + status = str_reader->query(context, "c2", qp_2886.get(), InvertedIndexQueryType::GREATER_THAN_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test GREATER_EQUAL_QUERY bitmap = std::make_shared(); - status = str_reader->query(context, "c2", &str_ref, + auto qp_2892 = TypedInvertedIndexQueryParam::create_unique(); + qp_2892->set_value(&str_ref); + status = str_reader->query(context, "c2", qp_2892.get(), InvertedIndexQueryType::GREATER_EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test MATCH_PHRASE_QUERY for StringType bitmap = std::make_shared(); - status = str_reader->query(context, "c2", &str_ref, + auto qp_2898 = TypedInvertedIndexQueryParam::create_unique(); + qp_2898->set_value(&str_ref); + status = str_reader->query(context, "c2", qp_2898.get(), InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test MATCH_PHRASE_PREFIX_QUERY for StringType bitmap = std::make_shared(); - status = str_reader->query(context, "c2", &str_ref, + auto qp_2904 = TypedInvertedIndexQueryParam::create_unique(); + qp_2904->set_value(&str_ref); + status = str_reader->query(context, "c2", qp_2904.get(), InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test MATCH_REGEXP_QUERY for StringType bitmap = std::make_shared(); - status = str_reader->query(context, "c2", &str_ref, + auto qp_2910 = TypedInvertedIndexQueryParam::create_unique(); + qp_2910->set_value(&str_ref); + status = str_reader->query(context, "c2", qp_2910.get(), InvertedIndexQueryType::MATCH_REGEXP_QUERY, bitmap); EXPECT_TRUE(status.ok()); } } + // Sanity probe: confirm type_limit is specialised and not + // falling through to the zero-init default of an unspecialised + // numeric_limits. TypedInvertedIndexQueryParam::encode_min/ + // max_ascending depend on this specialisation for valid +/-infinity + // sentinels — without it both bounds collapse to encoded({0,0}) and BKD + // half-bounded range queries on DecimalV2 columns silently miss negative + // (for LESS_THAN) or positive (for GREATER_THAN) values. + void test_type_limit_decimal12_specialisation() { + auto lo = type_limit::min(); + auto hi = type_limit::max(); + EXPECT_EQ(lo.integer, -999999999999999999LL); + EXPECT_EQ(lo.fraction, -999999999); + EXPECT_EQ(hi.integer, +999999999999999999LL); + EXPECT_EQ(hi.fraction, +999999999); + } + + // Same regression as decimal12_t but for uint24_t (TYPE_DATE storage). + // Values match OLAP DATE packing: 0001-01-01 / 9999-12-31. + void test_type_limit_uint24_specialisation() { + EXPECT_EQ(static_cast(type_limit::min()), 33u); + EXPECT_EQ(static_cast(type_limit::max()), 5119903u); + } + + // Generic BKD range-query verifier. Writes `values` into the BKD index + // for `column_name`, then runs EQUAL / LESS_THAN / LESS_EQUAL / + // GREATER_THAN / GREATER_EQUAL queries against `threshold`. Expected + // cardinalities are derived from the input `values` + `threshold` via + // std::count_if, so the caller doesn't have to keep them in sync. + // + // Locks in: + // * the typed-param interface (TypedInvertedIndexQueryParam) + // * the +/-infinity sentinels from type_limit + // * BKD's writer/reader/visitor agreement on KeyCoder-encoded bytes + template + void verify_bkd_range_queries(int col_id, std::string_view rowset_id, + const std::string& column_name, std::vector values, + T threshold) { + OlapReaderStatistics stats; + RuntimeState runtime_state; + io::IOContext io_ctx; + + IndexQueryContextPtr context = std::make_shared(); + context->io_ctx = &io_ctx; + context->stats = &stats; + context->runtime_state = &runtime_state; + + TabletIndex idx_meta; + std::string index_path_prefix; + prepare_bkd_index_typed(rowset_id, /*seg_id=*/0, col_id, values, &idx_meta, + &index_path_prefix); + + auto reader = std::make_shared( + io::global_local_filesystem(), index_path_prefix, InvertedIndexStorageFormatPB::V2); + EXPECT_TRUE(reader->init().ok()); + + auto bkd_reader = BkdIndexReader::create_shared(&idx_meta, reader); + EXPECT_NE(bkd_reader, nullptr); + + auto run_query = [&](InvertedIndexQueryType qt, T thr) { + auto qp = TypedInvertedIndexQueryParam::create_unique(); + qp->set_value(&thr); + auto bitmap = std::make_shared(); + auto status = bkd_reader->query(context, column_name, qp.get(), qt, bitmap); + EXPECT_TRUE(status.ok()) << column_name << ": " << status; + return bitmap->cardinality(); + }; + + const auto expect_eq = std::count_if(values.begin(), values.end(), + [&](const T& v) { return v == threshold; }); + const auto expect_lt = std::count_if(values.begin(), values.end(), + [&](const T& v) { return v < threshold; }); + const auto expect_le = std::count_if(values.begin(), values.end(), + [&](const T& v) { return v <= threshold; }); + const auto expect_gt = std::count_if(values.begin(), values.end(), + [&](const T& v) { return v > threshold; }); + const auto expect_ge = std::count_if(values.begin(), values.end(), + [&](const T& v) { return v >= threshold; }); + + EXPECT_EQ(run_query(InvertedIndexQueryType::EQUAL_QUERY, threshold), expect_eq) + << column_name << " EQUAL"; + EXPECT_EQ(run_query(InvertedIndexQueryType::LESS_THAN_QUERY, threshold), expect_lt) + << column_name << " LESS_THAN (relies on encode_min sentinel)"; + EXPECT_EQ(run_query(InvertedIndexQueryType::LESS_EQUAL_QUERY, threshold), expect_le) + << column_name << " LESS_EQUAL (relies on encode_min sentinel)"; + EXPECT_EQ(run_query(InvertedIndexQueryType::GREATER_THAN_QUERY, threshold), expect_gt) + << column_name << " GREATER_THAN (relies on encode_max sentinel)"; + EXPECT_EQ(run_query(InvertedIndexQueryType::GREATER_EQUAL_QUERY, threshold), expect_ge) + << column_name << " GREATER_EQUAL (relies on encode_max sentinel)"; + } + + // Per-type wrappers. col_id values match create_comprehensive_schema() + // (commented-out c_double/c_float shift later indices, hence date=4, + // datetime=5, decimal=6, bool=7, tinyint=8, smallint=9, largeint=10, + // datev2=12, datetimev2=13, timestamptz=14). + void test_bkd_range_int() { + // INT32 row counts: -1000 (loss), 0 (balance), 42, 100, 200, 300. + verify_bkd_range_queries( + /*col_id=*/0, "bkd_range_int", "c_int", {-1000, 0, 42, 100, 200, 300}, + /*threshold=*/100); + } + void test_bkd_range_bigint() { + // INT64 nanosecond timestamps (relative epoch deltas). + verify_bkd_range_queries( + /*col_id=*/1, "bkd_range_bigint", "c_bigint", + {-1'000'000LL, 0LL, 1'500LL, 1'000'000LL, 1'000'000'000LL, 100'000'000'000LL}, + /*threshold=*/1'000'000LL); + } + void test_bkd_range_smallint() { + // INT16 range: -32768..32767, e.g. signed short port deltas. + verify_bkd_range_queries( + /*col_id=*/9, "bkd_range_smallint", "c_smallint", + {int16_t(-32768), int16_t(-1024), int16_t(-1), int16_t(0), int16_t(8080), + int16_t(32767)}, + /*threshold=*/int16_t(0)); + } + void test_bkd_range_tinyint() { + // INT8 range: -128..127, e.g. log-level / tinyint flags. + verify_bkd_range_queries( + /*col_id=*/8, "bkd_range_tinyint", "c_tinyint", + {int8_t(-128), int8_t(-10), int8_t(-1), int8_t(0), int8_t(50), int8_t(127)}, + /*threshold=*/int8_t(0)); + } + void test_bkd_range_largeint() { + // INT128. Spans negative through ~10^12 to exercise the high half. + verify_bkd_range_queries( + /*col_id=*/10, "bkd_range_largeint", "c_largeint", + {static_cast<__int128_t>(-1'000'000), static_cast<__int128_t>(-1), + static_cast<__int128_t>(0), static_cast<__int128_t>(1), + static_cast<__int128_t>(1'000'000), static_cast<__int128_t>(1'000'000'000'000LL)}, + /*threshold=*/static_cast<__int128_t>(0)); + } + void test_bkd_range_decimalv2() { + // Real DecimalV2 (DECIMAL(27,9)) literals: -100.0, 0.0, 42.5, 100.0, 200.0, 300.0 + // (decimal12_t.fraction is scaled by 10^9). + verify_bkd_range_queries( + /*col_id=*/6, "bkd_range_decimalv2", "c_decimal", + {decimal12_t {-100, 0}, decimal12_t {0, 0}, decimal12_t {42, 500'000'000}, + decimal12_t {100, 0}, decimal12_t {200, 0}, decimal12_t {300, 0}}, + /*threshold=*/decimal12_t {100, 0}); + } + void test_bkd_range_date() { + // DATE in OLAP packed format `(year << 9) | (month << 5) | day`: + // 2020-01-01, 2021-06-15, 2023-03-10, 2024-12-31, 2026-08-08, 2030-01-01 + auto pack_date = [](int y, int m, int d) -> uint24_t { + return uint24_t(static_cast((y << 9) | (m << 5) | d)); + }; + verify_bkd_range_queries( + /*col_id=*/4, "bkd_range_date", "c_date", + {pack_date(2020, 1, 1), pack_date(2021, 6, 15), pack_date(2023, 3, 10), + pack_date(2024, 12, 31), pack_date(2026, 8, 8), pack_date(2030, 1, 1)}, + /*threshold=*/pack_date(2024, 12, 31)); + } + void test_bkd_range_datetime() { + // OLAP DATETIME packs as decimal YYYYMMDDhhmmss (see VecDateTimeValue:: + // to_olap_datetime). TypedInvertedIndexQueryParam::storage_val + // is int64_t (conditional_t override) to line up with KeyCoder's + // signed view. + auto dt = [](int y, int mo, int d, int h, int mi, int s) -> int64_t { + return static_cast((static_cast(y) * 10000 + mo * 100 + d) * + 1000000ULL + + static_cast(h) * 10000 + mi * 100 + s); + }; + verify_bkd_range_queries( + /*col_id=*/5, "bkd_range_datetime", "c_datetime", + {dt(2020, 1, 1, 12, 0, 0), dt(2021, 1, 1, 12, 0, 0), dt(2022, 6, 15, 15, 0, 0), + dt(2024, 3, 10, 9, 30, 0), dt(2025, 12, 25, 0, 0, 0), dt(2030, 1, 1, 12, 0, 0)}, + /*threshold=*/dt(2024, 3, 10, 9, 30, 0)); + } + void test_bkd_range_datev2() { + // DateV2 packed format: bits [0..4]=day, [5..8]=month, [9..23]=year. + auto pack_datev2 = [](int y, int m, int d) -> uint32_t { + return static_cast((y << 9) | (m << 5) | d); + }; + verify_bkd_range_queries( + /*col_id=*/12, "bkd_range_datev2", "c_datev2", + {pack_datev2(2020, 1, 1), pack_datev2(2021, 6, 15), pack_datev2(2023, 3, 10), + pack_datev2(2024, 12, 31), pack_datev2(2026, 8, 8), pack_datev2(2030, 1, 1)}, + /*threshold=*/pack_datev2(2024, 12, 31)); + } + // DateTimeV2 / TimestampTz packing per vdatetime_value.h: + // [date_v2 << 37] | [hour << 32] | [minute << 26] | [second << 20] | microsecond + // date_v2 = (year << 9) | (month << 5) | day + static uint64_t pack_dtv2(int y, int mo, int d, int h, int mi, int s, int us = 0) { + uint64_t date = (static_cast(y) << 9) | (static_cast(mo) << 5) | d; + return (date << 37) | (static_cast(h) << 32) | (static_cast(mi) << 26) | + (static_cast(s) << 20) | us; + } + void test_bkd_range_datetimev2() { + verify_bkd_range_queries( + /*col_id=*/13, "bkd_range_datetimev2", "c_datetimev2", + {pack_dtv2(2020, 1, 1, 12, 0, 0), pack_dtv2(2021, 6, 15, 15, 0, 0), + pack_dtv2(2023, 3, 10, 9, 30, 0), pack_dtv2(2024, 12, 31, 23, 59, 59), + pack_dtv2(2026, 8, 8, 8, 8, 8), pack_dtv2(2030, 1, 1, 12, 0, 0)}, + /*threshold=*/pack_dtv2(2024, 12, 31, 23, 59, 59)); + } + void test_bkd_range_timestamptz() { + // TimestampTzValue storage = uint64_t with the same DateTimeV2 packing + // (the TZ offset lives outside the BKD-indexed key). + verify_bkd_range_queries( + /*col_id=*/14, "bkd_range_timestamptz", "c_timestamptz", + {pack_dtv2(2020, 1, 1, 12, 0, 0), pack_dtv2(2021, 6, 15, 15, 0, 0), + pack_dtv2(2023, 3, 10, 9, 30, 0), pack_dtv2(2024, 12, 31, 23, 59, 59), + pack_dtv2(2026, 8, 8, 8, 8, 8), pack_dtv2(2030, 1, 1, 12, 0, 0)}, + /*threshold=*/pack_dtv2(2024, 12, 31, 23, 59, 59)); + } + void test_bkd_range_bool() { + // Storage = uint8_t. With duplicates {false,false,false,true,true,true} + // threshold=false means LT=0 / LE=3 / GT=3 / GE=6. + verify_bkd_range_queries( + /*col_id=*/7, "bkd_range_bool", "c_bool", + {uint8_t(0), uint8_t(0), uint8_t(0), uint8_t(1), uint8_t(1), uint8_t(1)}, + /*threshold=*/uint8_t(0)); + } + void test_bkd_range_float() { + // FLOAT real values: ~-100.5 (negative offset), -1.25, 0.0, π + // approximated, 100.25, 1234.5 (mid-positive). + verify_bkd_range_queries( + /*col_id=*/15, "bkd_range_float", "c_float", + {-100.5f, -1.25f, 0.0f, 3.14159f, 100.25f, 1234.5f}, + /*threshold=*/3.14159f); + } + void test_bkd_range_double() { + // DOUBLE real values across magnitudes from -1e10 to +1e10, including + // negative scientific, π, and large positive. + verify_bkd_range_queries( + /*col_id=*/16, "bkd_range_double", "c_double", + {-9.87654321e10, -1.5, 0.0, 3.14159265358979, 1.0e6, 1.0e10}, + /*threshold=*/3.14159265358979); + } + void test_bkd_range_decimal32() { + // DECIMAL(9, 2). Storage = real_value × 10^2. + auto d = [](double v) { return static_cast(std::llround(v * 100)); }; + verify_bkd_range_queries( + /*col_id=*/17, "bkd_range_decimal32", "c_decimal32", + {d(-1.00), d(-0.01), d(0.00), d(1.23), d(9999.99), d(999999.99)}, + /*threshold=*/d(1.23)); + } + void test_bkd_range_decimal64() { + // DECIMAL(18, 4). Storage = real_value × 10^4. + auto d = [](double v) { return static_cast(std::llround(v * 10000)); }; + verify_bkd_range_queries( + /*col_id=*/18, "bkd_range_decimal64", "c_decimal64", + {d(-100.0), d(0.0), d(0.0123), d(12345.6789), d(99999999.9999), d(9999999999.9999)}, + /*threshold=*/d(12345.6789)); + } + void test_bkd_range_decimal128i() { + // DECIMAL(38, 10) stored as Int128. Values: + // -100.0000000000, -0.0000000001, 0, 1.2345678900, + // 12345.6789012345, 1e30 (ledger-scale). + verify_bkd_range_queries( + /*col_id=*/19, "bkd_range_decimal128i", "c_decimal128i", + {static_cast<__int128_t>(-1'000'000'000'000LL), static_cast<__int128_t>(-1), + static_cast<__int128_t>(0), static_cast<__int128_t>(12'345'678'900LL), + static_cast<__int128_t>(123'456'789'012'345LL), + static_cast<__int128_t>(1'000'000'000'000'000LL) * + static_cast<__int128_t>(1'000'000'000'000'000LL)}, + /*threshold=*/static_cast<__int128_t>(12'345'678'900LL)); + } + void test_bkd_range_decimal256() { + // DECIMAL(76, 20) stored as wide::Int256. Use scaled integers spanning + // a representative range from -1e6 up to 10^18. + verify_bkd_range_queries( + /*col_id=*/20, "bkd_range_decimal256", "c_decimal256", + {wide::Int256(-1'000'000), wide::Int256(-1), wide::Int256(0), + wide::Int256(123'456'789), wide::Int256(123'456'789'012'345LL), + wide::Int256(1'000'000'000'000'000'000LL)}, + /*threshold=*/wide::Int256(123'456'789)); + } + void test_bkd_range_ipv4() { + // Real IPv4 addresses. uint32_t encoding = (a<<24)|(b<<16)|(c<<8)|d: + // 0.0.0.1, 10.0.0.1, 127.0.0.1, 192.168.0.1, 192.168.0.254, 255.255.255.254 + verify_bkd_range_queries( + /*col_id=*/21, "bkd_range_ipv4", "c_ipv4", + {0x00000001U, 0x0A000001U, 0x7F000001U, 0xC0A80001U, 0xC0A800FEU, 0xFFFFFFFEU}, + /*threshold=*/0xC0A80001U); // 192.168.0.1 + } + void test_bkd_range_ipv6() { + // Real IPv6 addresses (uint128_t = 16-byte big-endian view): + // ::1 (loopback) + // ::ffff:7f00:0001 (IPv4-mapped 127.0.0.1) + // 2001:db8::1 (documentation prefix) + // 2001:db8:1::1 + // fe80::1 (link-local) + // ffff:ffff:: (last valid) + auto ipv6 = [](uint64_t hi, uint64_t lo) -> uint128_t { + return (static_cast(hi) << 64) | lo; + }; + verify_bkd_range_queries( + /*col_id=*/22, "bkd_range_ipv6", "c_ipv6", + {ipv6(0, 1), ipv6(0, 0x0000FFFF7F000001ULL), ipv6(0x20010DB800000000ULL, 1), + ipv6(0x20010DB800010000ULL, 1), ipv6(0xFE80000000000000ULL, 1), + ipv6(0xFFFFFFFF00000000ULL, 0)}, + /*threshold=*/ipv6(0x20010DB800000000ULL, 1)); // 2001:db8::1 + } + // Test BKD specific uncovered paths void test_bkd_uncovered_paths() { std::string_view rowset_id = "test_bkd_uncovered"; @@ -2953,13 +3335,17 @@ class InvertedIndexReaderTest : public testing::Test { for (auto& test_case : test_cases) { // Test try_query path size_t count = 0; - auto status = bkd_reader->try_query(context, "c1", &test_case.second, test_case.first, - &count); + auto qp_2956 = TypedInvertedIndexQueryParam::create_unique(); + qp_2956->set_value(&test_case.second); + auto status = + bkd_reader->try_query(context, "c1", qp_2956.get(), test_case.first, &count); EXPECT_TRUE(status.ok()) << "Try query type: " << static_cast(test_case.first); // Test actual query path std::shared_ptr bitmap = std::make_shared(); - status = bkd_reader->query(context, "c1", &test_case.second, test_case.first, bitmap); + auto qp_2962 = TypedInvertedIndexQueryParam::create_unique(); + qp_2962->set_value(&test_case.second); + status = bkd_reader->query(context, "c1", qp_2962.get(), test_case.first, bitmap); EXPECT_TRUE(status.ok()) << "Query type: " << static_cast(test_case.first); } @@ -2968,12 +3354,16 @@ class InvertedIndexReaderTest : public testing::Test { int32_t max_value = 100; // Greater than maximum in data std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c1", &min_value, + auto qp_2971 = TypedInvertedIndexQueryParam::create_unique(); + qp_2971->set_value(&min_value); + auto status = bkd_reader->query(context, "c1", qp_2971.get(), InvertedIndexQueryType::GREATER_THAN_QUERY, bitmap); EXPECT_TRUE(status.ok()); bitmap = std::make_shared(); - status = bkd_reader->query(context, "c1", &max_value, + auto qp_2976 = TypedInvertedIndexQueryParam::create_unique(); + qp_2976->set_value(&max_value); + status = bkd_reader->query(context, "c1", qp_2976.get(), InvertedIndexQueryType::LESS_THAN_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3019,11 +3409,16 @@ class InvertedIndexReaderTest : public testing::Test { // Test the bypass path in read_from_inverted_index std::shared_ptr bitmap = std::make_shared(); int32_t query_value = 3; + auto make_int_qp = [&query_value]() { + auto qp = TypedInvertedIndexQueryParam::create_unique(); + qp->set_value(&query_value); + return qp; + }; // This should trigger the bypass logic due to low threshold InvertedIndexParam param; param.column_name = "c1"; - param.query_value = &query_value; + param.query_value = make_int_qp(); param.query_type = InvertedIndexQueryType::LESS_THAN_QUERY; param.num_rows = 5; param.roaring = bitmap; @@ -3037,7 +3432,7 @@ class InvertedIndexReaderTest : public testing::Test { bitmap = std::make_shared(); InvertedIndexParam param1; param1.column_name = "c1"; - param1.query_value = &query_value; + param1.query_value = make_int_qp(); param1.query_type = InvertedIndexQueryType::EQUAL_QUERY; param1.num_rows = 5; param1.roaring = bitmap; @@ -3047,10 +3442,11 @@ class InvertedIndexReaderTest : public testing::Test { // Test try_read_from_inverted_index with non-BKD compatible query size_t count = 0; + auto try_qp = make_int_qp(); status = inverted_index_iterator->try_read_from_inverted_index( std::static_pointer_cast( iterator->get_reader(InvertedIndexReaderType::STRING_TYPE)), - "c1", &query_value, InvertedIndexQueryType::MATCH_ANY_QUERY, &count); + "c1", try_qp.get(), InvertedIndexQueryType::MATCH_ANY_QUERY, &count); EXPECT_TRUE(status.ok()); // Should succeed but not do anything for non-BKD queries } @@ -3080,6 +3476,16 @@ class InvertedIndexReaderTest : public testing::Test { {"c_datev2", FieldType::OLAP_FIELD_TYPE_DATEV2, 4, false}, {"c_datetimev2", FieldType::OLAP_FIELD_TYPE_DATETIMEV2, 8, false}, {"c_timestamptz", FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ, 8, false}, + // Appended (col_id 15..) — keep new entries here so existing + // col_id references in older tests remain stable. + {"c_float", FieldType::OLAP_FIELD_TYPE_FLOAT, 4, false}, // 15 + {"c_double", FieldType::OLAP_FIELD_TYPE_DOUBLE, 8, false}, // 16 + {"c_decimal32", FieldType::OLAP_FIELD_TYPE_DECIMAL32, 4, false}, // 17 + {"c_decimal64", FieldType::OLAP_FIELD_TYPE_DECIMAL64, 8, false}, // 18 + {"c_decimal128i", FieldType::OLAP_FIELD_TYPE_DECIMAL128I, 16, false}, // 19 + {"c_decimal256", FieldType::OLAP_FIELD_TYPE_DECIMAL256, 32, false}, // 20 + {"c_ipv4", FieldType::OLAP_FIELD_TYPE_IPV4, 4, false}, // 21 + {"c_ipv6", FieldType::OLAP_FIELD_TYPE_IPV6, 16, false}, // 22 }; for (size_t i = 0; i < columns.size(); ++i) { @@ -3189,8 +3595,10 @@ class InvertedIndexReaderTest : public testing::Test { for (auto& test_case : test_cases) { std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_int", &test_case.second, - test_case.first, bitmap); + auto qp_3192 = TypedInvertedIndexQueryParam::create_unique(); + qp_3192->set_value(&test_case.second); + auto status = + bkd_reader->query(context, "c_int", qp_3192.get(), test_case.first, bitmap); EXPECT_TRUE(status.ok()) << "Query type: " << static_cast(test_case.first); if (test_case.first == InvertedIndexQueryType::EQUAL_QUERY) { @@ -3201,7 +3609,9 @@ class InvertedIndexReaderTest : public testing::Test { for (auto& test_case : test_cases) { size_t count = 0; - auto status = bkd_reader->try_query(context, "c_int", &test_case.second, + auto qp_3204 = TypedInvertedIndexQueryParam::create_unique(); + qp_3204->set_value(&test_case.second); + auto status = bkd_reader->try_query(context, "c_int", qp_3204.get(), test_case.first, &count); EXPECT_TRUE(status.ok()) << "Try query type: " << static_cast(test_case.first); } @@ -3226,7 +3636,9 @@ class InvertedIndexReaderTest : public testing::Test { int64_t query_value = 1000000LL; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_bigint", &query_value, + auto qp_3229 = TypedInvertedIndexQueryParam::create_unique(); + qp_3229->set_value(&query_value); + auto status = bkd_reader->query(context, "c_bigint", qp_3229.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); EXPECT_EQ(bitmap->cardinality(), 1); @@ -3259,7 +3671,9 @@ class InvertedIndexReaderTest : public testing::Test { for (auto& test_case : test_cases) { std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_timestamptz", &test_case.second, + auto qp_3262 = TypedInvertedIndexQueryParam::create_unique(); + qp_3262->set_value(&test_case.second); + auto status = bkd_reader->query(context, "c_timestamptz", qp_3262.get(), test_case.first, bitmap); EXPECT_TRUE(status.ok()) << "Query type: " << static_cast(test_case.first); @@ -3271,7 +3685,9 @@ class InvertedIndexReaderTest : public testing::Test { for (auto& test_case : test_cases) { size_t count = 0; - auto status = bkd_reader->try_query(context, "c_timestamptz", &test_case.second, + auto qp_3274 = TypedInvertedIndexQueryParam::create_unique(); + qp_3274->set_value(&test_case.second); + auto status = bkd_reader->try_query(context, "c_timestamptz", qp_3274.get(), test_case.first, &count); EXPECT_TRUE(status.ok()) << "Try query type: " << static_cast(test_case.first); } @@ -3297,7 +3713,9 @@ class InvertedIndexReaderTest : public testing::Test { double query_value = 3.14; std::shared_ptr bitmap = std::make_shared(); auto status = - bkd_reader->query(&io_ctx, &stats, &runtime_state, "c_double", &query_value, + auto qp_3300 = TypedInvertedIndexQueryParam::create_unique(); + qp_3300->set_value(&stats); + bkd_reader->query(&io_ctx, qp_3300.get(), &runtime_state, "c_double", &query_value, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); EXPECT_EQ(bitmap->cardinality(), 1); @@ -3323,7 +3741,9 @@ class InvertedIndexReaderTest : public testing::Test { float query_value = 1.5f; std::shared_ptr bitmap = std::make_shared(); auto status = - bkd_reader->query(&io_ctx, &stats, &runtime_state, "c_float", &query_value, + auto qp_3326 = TypedInvertedIndexQueryParam::create_unique(); + qp_3326->set_value(&stats); + bkd_reader->query(&io_ctx, qp_3326.get(), &runtime_state, "c_float", &query_value, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); EXPECT_EQ(bitmap->cardinality(), 1); @@ -3363,7 +3783,11 @@ class InvertedIndexReaderTest : public testing::Test { uint32_t query_value = 20240102; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_date", &query_value, + // TYPE_DATE storage is uint24_t — narrow from the test's uint32_t. + typename PrimitiveTypeTraits::StorageFieldType date_storage(query_value); + auto qp_3366 = TypedInvertedIndexQueryParam::create_unique(); + qp_3366->set_value(&date_storage); + auto status = bkd_reader->query(context, "c_date", qp_3366.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3386,9 +3810,14 @@ class InvertedIndexReaderTest : public testing::Test { auto bkd_reader = BkdIndexReader::create_shared(&idx_meta, reader); EXPECT_NE(bkd_reader, nullptr); - uint64_t query_value = 20240101130000ULL; + // TypedInvertedIndexQueryParam::storage_val is int64_t + // (conditional_t override on storage_val) — match the type to avoid + // signedness pointer-conversion errors. + int64_t query_value = 20240101130000LL; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_datetime", &query_value, + auto qp_3391 = TypedInvertedIndexQueryParam::create_unique(); + qp_3391->set_value(&query_value); + auto status = bkd_reader->query(context, "c_datetime", qp_3391.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3412,7 +3841,11 @@ class InvertedIndexReaderTest : public testing::Test { bool query_value = true; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_bool", &query_value, + // TYPE_BOOLEAN storage is uint8_t. + uint8_t bool_storage = query_value ? 1 : 0; + auto qp_3415 = TypedInvertedIndexQueryParam::create_unique(); + qp_3415->set_value(&bool_storage); + auto status = bkd_reader->query(context, "c_bool", qp_3415.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3436,7 +3869,9 @@ class InvertedIndexReaderTest : public testing::Test { int8_t query_value = 1; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_tinyint", &query_value, + auto qp_3439 = TypedInvertedIndexQueryParam::create_unique(); + qp_3439->set_value(&query_value); + auto status = bkd_reader->query(context, "c_tinyint", qp_3439.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3460,7 +3895,9 @@ class InvertedIndexReaderTest : public testing::Test { int16_t query_value = 1000; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_smallint", &query_value, + auto qp_3463 = TypedInvertedIndexQueryParam::create_unique(); + qp_3463->set_value(&query_value); + auto status = bkd_reader->query(context, "c_smallint", qp_3463.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3484,7 +3921,9 @@ class InvertedIndexReaderTest : public testing::Test { __int128 query_value = 0; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_largeint", &query_value, + auto qp_3487 = TypedInvertedIndexQueryParam::create_unique(); + qp_3487->set_value(&query_value); + auto status = bkd_reader->query(context, "c_largeint", qp_3487.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3508,7 +3947,9 @@ class InvertedIndexReaderTest : public testing::Test { uint32_t query_value = 20240202; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_datev2", &query_value, + auto qp_3511 = TypedInvertedIndexQueryParam::create_unique(); + qp_3511->set_value(&query_value); + auto status = bkd_reader->query(context, "c_datev2", qp_3511.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3533,7 +3974,9 @@ class InvertedIndexReaderTest : public testing::Test { uint64_t query_value = 20240201130000ULL; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_datetimev2", &query_value, + auto qp_3536 = TypedInvertedIndexQueryParam::create_unique(); + qp_3536->set_value(&query_value); + auto status = bkd_reader->query(context, "c_datetimev2", qp_3536.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3558,7 +4001,9 @@ class InvertedIndexReaderTest : public testing::Test { uint64_t query_value = 20240201130000ULL; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_timestamptz", &query_value, + auto qp_3561 = TypedInvertedIndexQueryParam::create_unique(); + qp_3561->set_value(&query_value); + auto status = bkd_reader->query(context, "c_timestamptz", qp_3561.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3644,8 +4089,10 @@ class InvertedIndexReaderTest : public testing::Test { std::string query_value = "test"; std::shared_ptr bitmap = std::make_shared(); + auto qp_unsupp = TypedInvertedIndexQueryParam::create_unique(); + qp_unsupp->set_value(query_value); auto query_status = - bkd_reader->query(context, "c_unsupported", &query_value, + bkd_reader->query(context, "c_unsupported", qp_unsupp.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); // This might fail due to unsupported type, which is what we want to test } @@ -3673,6 +4120,126 @@ TEST_F(InvertedIndexReaderTest, BkdIndexRead) { test_bkd_index_read(); } +// Regression: type_limit must be specialised, otherwise both +// min() and max() collapse to decimal12_t{0, 0} (zero-init default of an +// unspecialised numeric_limits) and TypedInvertedIndexQueryParam +// produces invalid +/-infinity sentinels. +TEST_F(InvertedIndexReaderTest, TypeLimitDecimal12Specialisation) { + test_type_limit_decimal12_specialisation(); +} + +// BKD half-bounded range query regression suite, one TEST_F per BKD-supported +// PrimitiveType. They all share `verify_bkd_range_queries`, which: +// - writes 6 sorted values into a fresh BKD index +// - asserts EQUAL / LESS_THAN / LESS_EQUAL / GREATER_THAN / GREATER_EQUAL +// cardinalities derived from the values via std::count_if. +// +// Locks in the typed-param interface, the +/-infinity sentinels from +// type_limit, and BKD writer/reader/visitor agreement. +TEST_F(InvertedIndexReaderTest, TypeLimitUint24Specialisation) { + test_type_limit_uint24_specialisation(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeIntRangeQuery) { + test_bkd_range_int(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeBigIntRangeQuery) { + test_bkd_range_bigint(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeSmallIntRangeQuery) { + test_bkd_range_smallint(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeTinyIntRangeQuery) { + test_bkd_range_tinyint(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeLargeIntRangeQuery) { + test_bkd_range_largeint(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDecimalV2RangeQuery) { + test_bkd_range_decimalv2(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDateRangeQuery) { + test_bkd_range_date(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDateTimeRangeQuery) { + test_bkd_range_datetime(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDateV2RangeQuery) { + test_bkd_range_datev2(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDateTimeV2RangeQuery) { + test_bkd_range_datetimev2(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeTimestampTzRangeQuery) { + test_bkd_range_timestamptz(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeBoolRangeQuery) { + test_bkd_range_bool(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeFloatRangeQuery) { + test_bkd_range_float(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDoubleRangeQuery) { + test_bkd_range_double(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDecimal32RangeQuery) { + test_bkd_range_decimal32(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDecimal64RangeQuery) { + test_bkd_range_decimal64(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDecimal128IRangeQuery) { + test_bkd_range_decimal128i(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDecimal256RangeQuery) { + test_bkd_range_decimal256(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeIPv4RangeQuery) { + test_bkd_range_ipv4(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeIPv6RangeQuery) { + test_bkd_range_ipv6(); +} + +// Verifies that KeyCoder produces byte-identical +// output regardless of whether the input pointer is to int64_t or uint64_t. +// This is what makes TypedInvertedIndexQueryParam::storage_val +// = int64_t (signed view) and the historic uint64_t storage interchangeable for +// real datetime values: KeyCoder reads bit pattern via memcpy, then sign-flips +// based on its own CppType (= int64_t) — so signed/unsigned at the call site +// doesn't change the encoded bytes as long as bit patterns agree. +TEST(KeyCoderDateTimeTest, ByteIdenticalForSignedAndUnsignedInput) { + const auto* coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_DATETIME); + ASSERT_NE(coder, nullptr); + + // Mix realistic datetimes with the boundary values that drive sentinels. + constexpr int64_t kCases[] = { + 10101000000LL, // 0001-01-01 00:00:00 (smallest valid set_to_min sentinel) + 20200101120000LL, // 2020-01-01 12:00:00 + 20240310093000LL, // 2024-03-10 09:30:00 + 99991231235959LL, // 9999-12-31 23:59:59 (largest valid set_to_max sentinel) + std::numeric_limits::max(), // type_limit max sentinel + 0LL, // type_limit min (also = INT64_MIN's bit-flipped image) + std::numeric_limits::lowest(), + }; + + for (int64_t case_val : kCases) { + int64_t signed_val = case_val; + uint64_t unsigned_val; + std::memcpy(&unsigned_val, &signed_val, sizeof(unsigned_val)); + + std::string signed_buf; + std::string unsigned_buf; + coder->full_encode_ascending(&signed_val, &signed_buf); + coder->full_encode_ascending(&unsigned_val, &unsigned_buf); + + ASSERT_EQ(signed_buf.size(), sizeof(int64_t)); + ASSERT_EQ(unsigned_buf.size(), sizeof(uint64_t)); + EXPECT_EQ(signed_buf, unsigned_buf) + << "DATETIME KeyCoder must produce identical bytes for value " + << static_cast(case_val) << " regardless of pointer type"; + } +} + // Query cache test TEST_F(InvertedIndexReaderTest, QueryCache) { test_query_cache(); diff --git a/be/test/storage/segment/inverted_index_writer_test.cpp b/be/test/storage/segment/inverted_index_writer_test.cpp index 04742d5dee06fe..e3ad19a61d9af4 100644 --- a/be/test/storage/segment/inverted_index_writer_test.cpp +++ b/be/test/storage/segment/inverted_index_writer_test.cpp @@ -40,6 +40,7 @@ #include "storage/index/index_file_writer.h" #include "storage/index/inverted/inverted_index_desc.h" #include "storage/index/inverted/inverted_index_fs_directory.h" +#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/inverted_index_reader.h" #include "storage/iterator/olap_data_convertor.h" #include "storage/tablet/tablet_schema.h" @@ -176,7 +177,9 @@ class InvertedIndexWriterTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto status = bkd_reader->query(context, "c1", &values[i], + auto qp = TypedInvertedIndexQueryParam::create_unique(); + qp->set_value(&values[i]); + auto status = bkd_reader->query(context, "c1", qp.get(), doris::segment_v2::InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()) << status; @@ -202,7 +205,9 @@ class InvertedIndexWriterTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto status = bkd_reader->query(context, "c1", &test_value, + auto test_qp = TypedInvertedIndexQueryParam::create_unique(); + test_qp->set_value(&test_value); + auto status = bkd_reader->query(context, "c1", test_qp.get(), doris::segment_v2::InvertedIndexQueryType::LESS_THAN_QUERY, less_than_bitmap); EXPECT_TRUE(status.ok()) << status; @@ -221,7 +226,7 @@ class InvertedIndexWriterTest : public testing::Test { // Test GREATER_THAN query std::shared_ptr greater_than_bitmap = std::make_shared(); - status = bkd_reader->query(context, "c1", &test_value, + status = bkd_reader->query(context, "c1", test_qp.get(), doris::segment_v2::InvertedIndexQueryType::GREATER_THAN_QUERY, greater_than_bitmap); EXPECT_TRUE(status.ok()) << status; @@ -739,7 +744,9 @@ class InvertedIndexWriterTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = inverted_reader->query(context, field_name, &str_ref, + auto qp = TypedInvertedIndexQueryParam::create_unique(); + qp->set_value(&str_ref); + auto query_status = inverted_reader->query(context, field_name, qp.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; // For regular strings, both should work the same @@ -945,12 +952,14 @@ TEST_F(InvertedIndexWriterTest, CompareUnicodeStringWriteResults) { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status_enabled = - inverted_reader_enabled->query(context, field_name, &values[i], - InvertedIndexQueryType::EQUAL_QUERY, bitmap_enabled); + StringRef str_ref(values[i].data, values[i].size); + auto qp = TypedInvertedIndexQueryParam::create_unique(); + qp->set_value(&str_ref); + auto query_status_enabled = inverted_reader_enabled->query( + context, field_name, qp.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap_enabled); auto query_status_disabled = inverted_reader_disabled->query( - context, field_name, &values[i], InvertedIndexQueryType::EQUAL_QUERY, + context, field_name, qp.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap_disabled); EXPECT_TRUE(query_status_enabled.ok()) << query_status_enabled; diff --git a/be/test/storage/storage_types_test.cpp b/be/test/storage/storage_types_test.cpp index b3ba23a51a0c0d..5d79e4f56ec320 100644 --- a/be/test/storage/storage_types_test.cpp +++ b/be/test/storage/storage_types_test.cpp @@ -45,20 +45,6 @@ void common_test(typename TypeTraits::CppType src_val) { EXPECT_EQ(field_type, type->type()); EXPECT_EQ(sizeof(src_val), type->size()); - // test min - { - typename TypeTraits::CppType dst_val; - type->set_to_min((char*)&dst_val); - - EXPECT_TRUE(type->cmp((char*)&src_val, (char*)&dst_val) > 0); - } - // test max - { - typename TypeTraits::CppType dst_val; - type->set_to_max((char*)&dst_val); - // NOTE: bool input is true, this will return 0 - EXPECT_TRUE(type->cmp((char*)&src_val, (char*)&dst_val) <= 0); - } } template @@ -69,22 +55,6 @@ void test_char(Slice src_val) { EXPECT_EQ(field->type(), fieldType); EXPECT_EQ(sizeof(src_val), type->size()); - // test min - { - char buf[64]; - Slice dst_val(buf, sizeof(buf)); - field->set_to_min((char*)&dst_val); - - EXPECT_TRUE(type->cmp((char*)&src_val, (char*)&dst_val) > 0); - } - // test max - { - char buf[64]; - Slice dst_val(buf, sizeof(buf)); - field->set_to_max((char*)&dst_val); - - EXPECT_TRUE(type->cmp((char*)&src_val, (char*)&dst_val) < 0); - } delete field; } From 9b1bdc2f75c1985d9d222c989a157e1935707a97 Mon Sep 17 00:00:00 2001 From: csun5285 Date: Wed, 6 May 2026 16:42:28 +0800 Subject: [PATCH 02/10] [refactor](inverted-index) replace InvertedIndexQueryParam with Field Drop the InvertedIndexQueryParam / StringQueryParam / NumericQueryParam hierarchy and the InvertedIndexQueryParamFactory. All IndexReader::query and try_query methods now take a const Field& directly. BkdIndexReader performs the Field -> KeyCoder dispatch internally via a macro-expanded switch on FieldType, using CppTypeTraits::CppType as the encoding type (which already handles DATETIME's signed/unsigned distinction). Removes ~200 lines of factory plus the param hierarchy, eliminates the runtime dynamic_cast in BkdIndexReader::query, and pushes type dispatch from predicate-construction time to query time. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../function/array/function_array_index.h | 8 +- .../function/array/function_arrays_overlap.h | 9 +- be/src/exprs/function/function_ip.h | 13 +- .../exprs/function/function_multi_match.cpp | 7 +- be/src/exprs/function/functions_comparison.h | 9 +- be/src/exprs/function/in.h | 8 +- be/src/exprs/function/match.cpp | 8 +- .../inverted/inverted_index_iterator.cpp | 15 +- .../index/inverted/inverted_index_iterator.h | 7 +- .../inverted/inverted_index_query_param.h | 176 +---- .../index/inverted/inverted_index_reader.cpp | 169 +++-- .../index/inverted/inverted_index_reader.h | 35 +- .../storage/predicate/comparison_predicate.h | 8 +- be/src/storage/predicate/in_list_predicate.h | 12 +- ...omparison_evaluate_inverted_index_test.cpp | 6 +- be/test/exprs/function/function_ip_test.cpp | 6 +- .../util/index_compaction_utils.cpp | 11 +- .../segment/index_reader_helper_test.cpp | 6 +- .../segment/inverted_index_iterator_test.cpp | 4 +- .../inverted_index_query_param_test.cpp | 613 +----------------- .../segment/inverted_index_reader_test.cpp | 379 +++++------ .../segment/inverted_index_writer_test.cpp | 27 +- 22 files changed, 370 insertions(+), 1166 deletions(-) diff --git a/be/src/exprs/function/array/function_array_index.h b/be/src/exprs/function/array/function_array_index.h index 2654e46f76a24f..3f39fc627b4887 100644 --- a/be/src/exprs/function/array/function_array_index.h +++ b/be/src/exprs/function/array/function_array_index.h @@ -41,11 +41,11 @@ #include "core/data_type/data_type_number.h" // IWYU pragma: keep #include "core/data_type/define_primitive_type.h" #include "core/data_type/primitive_type.h" +#include "core/field.h" #include "core/string_ref.h" #include "core/types.h" #include "exprs/function/function.h" #include "storage/index/index_reader_helper.h" -#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/inverted_index_query_type.h" #include "storage/index/inverted/inverted_index_reader.h" #include "storage/predicate/column_predicate.h" @@ -152,7 +152,6 @@ class FunctionArrayIndex : public IFunction { } Field param_value; arguments[0].column->get(0, param_value); - auto param_type = arguments[0].type->get_primitive_type(); // The current implementation for the inverted index of arrays cannot handle cases where the array contains null values, // meaning an item in the array is null. if (param_value.is_null()) { @@ -165,13 +164,10 @@ class FunctionArrayIndex : public IFunction { RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); null_bitmap = null_bitmap_cache_handle.get_bitmap(); } - std::unique_ptr query_param = nullptr; - RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value(param_type, ¶m_value, - query_param)); InvertedIndexParam param; param.column_name = data_type_with_name.first; param.column_type = data_type_with_name.second; - param.query_value = std::move(query_param); + param.query_value = param_value; param.query_type = segment_v2::InvertedIndexQueryType::EQUAL_QUERY; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/exprs/function/array/function_arrays_overlap.h b/be/src/exprs/function/array/function_arrays_overlap.h index f788c04e852c6d..d71643945ca796 100644 --- a/be/src/exprs/function/array/function_arrays_overlap.h +++ b/be/src/exprs/function/array/function_arrays_overlap.h @@ -230,10 +230,6 @@ class FunctionArraysOverlap : public IFunction { Field param_value; arguments[0].column->get(0, param_value); DCHECK(arguments[0].type->get_primitive_type() == TYPE_ARRAY); - auto nested_param_type = - check_and_get_data_type(remove_nullable(arguments[0].type).get()) - ->get_nested_type() - ->get_primitive_type(); // The current implementation for the inverted index of arrays cannot handle cases where the array contains null values, // meaning an item in the array is null. if (param_value.is_null()) { @@ -246,7 +242,6 @@ class FunctionArraysOverlap : public IFunction { RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); null_bitmap = null_bitmap_cache_handle.get_bitmap(); } - std::unique_ptr query_param = nullptr; const Array& query_val = param_value.get(); InvertedIndexParam param; @@ -260,9 +255,7 @@ class FunctionArraysOverlap : public IFunction { if (nested_query_val.is_null()) { return Status::OK(); } - RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( - nested_param_type, &nested_query_val, query_param)); - param.query_value = std::move(query_param); + param.query_value = nested_query_val; param.roaring = std::make_shared(); param.analyzer_ctx = analyzer_ctx; RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {¶m})); diff --git a/be/src/exprs/function/function_ip.h b/be/src/exprs/function/function_ip.h index ec6a172e021ec8..cb176a081efd95 100644 --- a/be/src/exprs/function/function_ip.h +++ b/be/src/exprs/function/function_ip.h @@ -40,6 +40,7 @@ #include "core/data_type/data_type_number.h" #include "core/data_type/data_type_string.h" #include "core/data_type/data_type_struct.h" +#include "core/field.h" #include "core/types.h" #include "core/value/ip_address_cidr.h" #include "exec/common/endian.h" @@ -48,7 +49,6 @@ #include "exprs/function/function.h" #include "exprs/function/function_helpers.h" #include "storage/index/index_reader_helper.h" -#include "storage/index/inverted/inverted_index_query_param.h" namespace doris { @@ -708,29 +708,22 @@ class FunctionIsIPAddressInRange : public IFunction { // apply for inverted index std::shared_ptr null_bitmap = std::make_shared(); - auto param_type = data_type_with_name.second->get_primitive_type(); - std::unique_ptr query_param = nullptr; - // >= min ip - RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value( - param_type, &min_ip, query_param)); segment_v2::InvertedIndexParam min_param; min_param.column_name = data_type_with_name.first; min_param.column_type = data_type_with_name.second; min_param.query_type = segment_v2::InvertedIndexQueryType::GREATER_EQUAL_QUERY; - min_param.query_value = std::move(query_param); + min_param.query_value = min_ip; min_param.num_rows = num_rows; min_param.roaring = std::make_shared(); RETURN_IF_ERROR(iter->read_from_index(&min_param)); // <= max ip - RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value( - param_type, &max_ip, query_param)); segment_v2::InvertedIndexParam max_param; max_param.column_name = data_type_with_name.first; max_param.column_type = data_type_with_name.second; max_param.query_type = segment_v2::InvertedIndexQueryType::LESS_EQUAL_QUERY; - max_param.query_value = std::move(query_param); + max_param.query_value = max_ip; max_param.num_rows = num_rows; max_param.roaring = std::make_shared(); RETURN_IF_ERROR(iter->read_from_index(&max_param)); diff --git a/be/src/exprs/function/function_multi_match.cpp b/be/src/exprs/function/function_multi_match.cpp index 68dea3f731e4ab..ecc10e593ef4a8 100644 --- a/be/src/exprs/function/function_multi_match.cpp +++ b/be/src/exprs/function/function_multi_match.cpp @@ -25,10 +25,10 @@ #include #include "core/column/column.h" +#include "core/field.h" #include "exprs/function/simple_function_factory.h" #include "exprs/vslot_ref.h" #include "io/fs/file_reader.h" -#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/query/phrase_prefix_query.h" #include "storage/segment/segment_iterator.h" @@ -80,12 +80,9 @@ Status FunctionMultiMatch::evaluate_inverted_index( return Status::Error( "arguments for multi_match must be string"); } - auto query_param = segment_v2::TypedInvertedIndexQueryParam::create_unique(); - query_param->set_value(&query_str_ref); - // search InvertedIndexParam param; - param.query_value = std::move(query_param); + param.query_value = Field::create_field(query_str_ref.to_string()); param.query_type = query_type; param.num_rows = num_rows; for (size_t i = 0; i < data_type_with_names.size(); i++) { diff --git a/be/src/exprs/function/functions_comparison.h b/be/src/exprs/function/functions_comparison.h index e5228bd8889285..5d891248e08c11 100644 --- a/be/src/exprs/function/functions_comparison.h +++ b/be/src/exprs/function/functions_comparison.h @@ -34,13 +34,13 @@ #include "core/data_type/data_type_string.h" #include "core/data_type/define_primitive_type.h" #include "core/decimal_comparison.h" +#include "core/field.h" #include "core/memcmp_small.h" #include "core/value/vdatetime_value.h" #include "exprs/function/function.h" #include "exprs/function/function_helpers.h" #include "exprs/function/functions_logical.h" #include "storage/index/index_reader_helper.h" -#include "storage/index/inverted/inverted_index_query_param.h" namespace doris { @@ -487,15 +487,10 @@ class FunctionComparison : public IFunction { if (param_value.is_null()) { return Status::OK(); } - auto param_type = arguments[0].type->get_primitive_type(); - std::unique_ptr query_param = nullptr; - RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value( - param_type, ¶m_value, query_param)); - segment_v2::InvertedIndexParam param; param.column_name = data_type_with_name.first; param.column_type = data_type_with_name.second; - param.query_value = std::move(query_param); + param.query_value = param_value; param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/exprs/function/in.h b/be/src/exprs/function/in.h index ae69e05d6af19b..78435dd27640a0 100644 --- a/be/src/exprs/function/in.h +++ b/be/src/exprs/function/in.h @@ -38,6 +38,7 @@ #include "core/data_type/data_type_nullable.h" #include "core/data_type/data_type_number.h" #include "core/data_type/define_primitive_type.h" +#include "core/field.h" #include "core/string_ref.h" #include "core/types.h" #include "exprs/aggregate/aggregate_function.h" @@ -46,7 +47,6 @@ #include "exprs/function_context.h" #include "exprs/hybrid_set.h" #include "storage/index/index_reader_helper.h" -#include "storage/index/inverted/inverted_index_query_param.h" namespace doris { @@ -162,7 +162,6 @@ class FunctionIn : public IFunction { for (const auto& arg : arguments) { Field param_value; arg.column->get(0, param_value); - auto param_type = arg.type->get_primitive_type(); if (param_value.is_null()) { // predicate like column NOT IN (NULL, '') should not push down to index. if (negative) { @@ -171,14 +170,11 @@ class FunctionIn : public IFunction { *roaring |= *null_bitmap; continue; } - std::unique_ptr query_param = nullptr; - RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( - param_type, ¶m_value, query_param)); InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY; segment_v2::InvertedIndexParam param; param.column_name = data_type_with_name.first; param.column_type = data_type_with_name.second; - param.query_value = std::move(query_param); + param.query_value = param_value; param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/exprs/function/match.cpp b/be/src/exprs/function/match.cpp index 92757b8b8ecd54..adab0ba82c7bb8 100644 --- a/be/src/exprs/function/match.cpp +++ b/be/src/exprs/function/match.cpp @@ -19,11 +19,11 @@ #include +#include "core/field.h" #include "runtime/query_context.h" #include "runtime/runtime_state.h" #include "storage/index/index_reader_helper.h" #include "storage/index/inverted/analyzer/analyzer.h" -#include "storage/index/inverted/inverted_index_query_param.h" #include "util/debug_points.h" namespace doris { @@ -80,14 +80,10 @@ Status FunctionMatchBase::evaluate_inverted_index( return Status::Error( "arguments for match must be string"); } - std::unique_ptr query_param = nullptr; - RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value(param_type, ¶m_value, - query_param)); - InvertedIndexParam param; param.column_name = data_type_with_name.first; param.column_type = data_type_with_name.second; - param.query_value = std::move(query_param); + param.query_value = param_value; param.query_type = get_query_type_from_fn_name(); param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/storage/index/inverted/inverted_index_iterator.cpp b/be/src/storage/index/inverted/inverted_index_iterator.cpp index c72bf00fffe64e..936b82d5d56abe 100644 --- a/be/src/storage/index/inverted/inverted_index_iterator.cpp +++ b/be/src/storage/index/inverted/inverted_index_iterator.cpp @@ -88,8 +88,8 @@ Status InvertedIndexIterator::read_from_index(const IndexParam& param) { runtime_state->query_options().inverted_index_skip_threshold; size_t hit_count = 0; RETURN_IF_ERROR(try_read_from_inverted_index(reader, i_param->column_name, - i_param->query_value.get(), - i_param->query_type, &hit_count)); + i_param->query_value, i_param->query_type, + &hit_count)); if (hit_count > i_param->num_rows * query_bkd_limit_percent / 100) { return Status::Error( "hit count: {}, bkd inverted reached limit {}% , segment num " @@ -101,7 +101,7 @@ Status InvertedIndexIterator::read_from_index(const IndexParam& param) { // Note: analyzer_ctx is now passed via i_param->analyzer_ctx auto execute_query = [&]() { - return reader->query(_context, i_param->column_name, i_param->query_value.get(), + return reader->query(_context, i_param->column_name, i_param->query_value, i_param->query_type, i_param->roaring, i_param->analyzer_ctx); }; @@ -133,10 +133,11 @@ Result InvertedIndexIterator::has_null() { return reader->has_null(); } -Status InvertedIndexIterator::try_read_from_inverted_index( - const InvertedIndexReaderPtr& reader, const std::string& column_name, - const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, - size_t* count) { +Status InvertedIndexIterator::try_read_from_inverted_index(const InvertedIndexReaderPtr& reader, + const std::string& column_name, + const Field& query_value, + InvertedIndexQueryType query_type, + size_t* count) { // NOTE: only bkd index support try read now. if (query_type == InvertedIndexQueryType::GREATER_EQUAL_QUERY || query_type == InvertedIndexQueryType::GREATER_THAN_QUERY || diff --git a/be/src/storage/index/inverted/inverted_index_iterator.h b/be/src/storage/index/inverted/inverted_index_iterator.h index 7ecef754045fc7..afc4a663670633 100644 --- a/be/src/storage/index/inverted/inverted_index_iterator.h +++ b/be/src/storage/index/inverted/inverted_index_iterator.h @@ -19,6 +19,7 @@ #include #include +#include "core/field.h" #include "storage/index/analyzer_key_matcher.h" #include "storage/index/index_iterator.h" #include "storage/index/inverted/inverted_index_parser.h" @@ -29,8 +30,7 @@ namespace doris::segment_v2 { struct InvertedIndexParam { std::string column_name; DataTypePtr column_type; - // Owns the typed query value; reader receives query_value.get(). - std::unique_ptr query_value; + Field query_value; InvertedIndexQueryType query_type; uint32_t num_rows; std::shared_ptr roaring; @@ -74,8 +74,7 @@ class InvertedIndexIterator : public IndexIterator { ENABLE_FACTORY_CREATOR(InvertedIndexIterator); Status try_read_from_inverted_index(const InvertedIndexReaderPtr& reader, - const std::string& column_name, - const InvertedIndexQueryParam* query_value, + const std::string& column_name, const Field& query_value, InvertedIndexQueryType query_type, size_t* count); // Normalize analyzer_key to lowercase. diff --git a/be/src/storage/index/inverted/inverted_index_query_param.h b/be/src/storage/index/inverted/inverted_index_query_param.h index 618b838ae4d6b4..cb92cf9661b6ad 100644 --- a/be/src/storage/index/inverted/inverted_index_query_param.h +++ b/be/src/storage/index/inverted/inverted_index_query_param.h @@ -17,178 +17,4 @@ #pragma once -#include -#include -#include - -#include "common/factory_creator.h" -#include "common/status.h" -#include "core/data_type/primitive_type.h" -#include "core/string_ref.h" -#include "core/type_limit.h" -#include "storage/index/inverted/inverted_index_reader.h" -#include "storage/key_coder.h" - -namespace doris { -class Field; - -namespace segment_v2 { - -// Typed query value passed from FE to InvertedIndexReader. -// Two disjoint shapes under a common root for unified ownership: -// StringQueryParam — get_string() (FullText / String readers) -// NumericQueryParam — encode_ascending() + encode_min/max_* (BkdIndexReader) -// TypedInvertedIndexQueryParam picks its base via PT (string vs numeric). - -class InvertedIndexQueryParam { -public: - virtual ~InvertedIndexQueryParam() = default; -}; - -class StringQueryParam : public InvertedIndexQueryParam { -public: - virtual void get_string(std::string* out) const = 0; -}; - -class NumericQueryParam : public InvertedIndexQueryParam { -public: - // Encode _value through KeyCoder ascending. encode_min/max encode the type's - // sentinel for the open side of BKD half-bounded ranges. - virtual void encode_ascending(const KeyCoder* coder, std::string* out) const = 0; - virtual void encode_min_ascending(const KeyCoder* coder, std::string* out) const = 0; - virtual void encode_max_ascending(const KeyCoder* coder, std::string* out) const = 0; -}; - -template -class TypedInvertedIndexQueryParam : public NumericQueryParam { - ENABLE_FACTORY_CREATOR(TypedInvertedIndexQueryParam); - -public: - // Storage type aligned with KeyCoder's view. Override per-PT when - // PrimitiveTypeTraits disagrees with KeyCoder on signedness — otherwise - // type_limit::min/max produces broken sentinels. - // TYPE_DATETIME: PrimitiveTypeTraits=uint64_t but KeyCoder=int64_t. With - // uint64_t, type_limit::max() = UINT64_MAX is read as -1 and encodes - // smaller than any real datetime — broken +inf. - using storage_val = std::conditional_t::StorageFieldType>; - - void set_value(const storage_val* value) { _value = *value; } - const storage_val& value() const { return _value; } - - void encode_ascending(const KeyCoder* coder, std::string* out) const override { - coder->full_encode_ascending(&_value, out); - } - void encode_min_ascending(const KeyCoder* coder, std::string* out) const override { - storage_val v = type_limit::min(); - coder->full_encode_ascending(&v, out); - } - void encode_max_ascending(const KeyCoder* coder, std::string* out) const override { - storage_val v = type_limit::max(); - coder->full_encode_ascending(&v, out); - } - -private: - storage_val _value; -}; - -template - requires(is_string_type(PT)) -class TypedInvertedIndexQueryParam : public StringQueryParam { - ENABLE_FACTORY_CREATOR(TypedInvertedIndexQueryParam); - -public: - void set_value(const std::string& value) { _value = value; } - void set_value(const StringRef* value) { _value.assign(value->data, value->size); } - - const std::string& value() const { return _value; } - - void get_string(std::string* out) const override { *out = _value; } - -private: - std::string _value; -}; - -// Static-only: maps FE values (Field / scalars / StringRef) to the right -// TypedInvertedIndexQueryParam. -class InvertedIndexQueryParamFactory { -public: - InvertedIndexQueryParamFactory() = delete; - - template - static Status create_query_value(const ValueType* value, - std::unique_ptr& result_param) { - static_assert(!std::is_same_v, - "ValueType cannot be void, as it is unsupported and dangerous."); - - using CPP_TYPE = typename PrimitiveTypeTraits::CppType; - std::unique_ptr> param = - TypedInvertedIndexQueryParam::create_unique(); - - if constexpr (is_string_type(PT)) { - if constexpr (std::is_same_v) { - const auto& str = value->template get(); - param->set_value(str); - } else if constexpr (std::is_same_v) { - param->set_value(value); - } else { - static_assert(std::is_convertible_v, - "ValueType must be convertible to std::string for string types"); - param->set_value(std::string(*value)); - } - } else { - CPP_TYPE cpp_val; - if constexpr (std::is_same_v) { - auto field_val = value->template get(); - cpp_val = static_cast(field_val); - } else { - cpp_val = static_cast(*value); - } - - typename TypedInvertedIndexQueryParam::storage_val storage_val_v = - PrimitiveTypeConvertor::to_storage_field_type(cpp_val); - param->set_value(&storage_val_v); - } - result_param = std::move(param); - return Status::OK(); - } - - static Status create_query_value(const PrimitiveType& primitiveType, const doris::Field* value, - std::unique_ptr& result_param) { - switch (primitiveType) { -#define M(TYPE) \ - case TYPE: { \ - return create_query_value(value, result_param); \ - } - M(PrimitiveType::TYPE_BOOLEAN) - M(PrimitiveType::TYPE_TINYINT) - M(PrimitiveType::TYPE_SMALLINT) - M(PrimitiveType::TYPE_INT) - M(PrimitiveType::TYPE_BIGINT) - M(PrimitiveType::TYPE_LARGEINT) - M(PrimitiveType::TYPE_FLOAT) - M(PrimitiveType::TYPE_DOUBLE) - M(PrimitiveType::TYPE_DECIMALV2) - M(PrimitiveType::TYPE_DECIMAL32) - M(PrimitiveType::TYPE_DECIMAL64) - M(PrimitiveType::TYPE_DECIMAL128I) - M(PrimitiveType::TYPE_DECIMAL256) - M(PrimitiveType::TYPE_DATE) - M(PrimitiveType::TYPE_DATETIME) - M(PrimitiveType::TYPE_CHAR) - M(PrimitiveType::TYPE_VARCHAR) - M(PrimitiveType::TYPE_STRING) - M(PrimitiveType::TYPE_DATEV2) - M(PrimitiveType::TYPE_DATETIMEV2) - M(PrimitiveType::TYPE_IPV4) - M(PrimitiveType::TYPE_IPV6) -#undef M - default: - return Status::NotSupported("Unsupported primitive type {} for inverted index reader", - primitiveType); - } - } -}; - -} // namespace segment_v2 -} // namespace doris +#include "core/field.h" diff --git a/be/src/storage/index/inverted/inverted_index_reader.cpp b/be/src/storage/index/inverted/inverted_index_reader.cpp index c05e2068d9bf61..95f7a071da1e82 100644 --- a/be/src/storage/index/inverted/inverted_index_reader.cpp +++ b/be/src/storage/index/inverted/inverted_index_reader.cpp @@ -33,12 +33,15 @@ #include #include #include +#include #include "common/config.h" #include "common/exception.h" #include "common/logging.h" #include "common/status.h" +#include "core/data_type/primitive_type.h" #include "core/string_ref.h" +#include "core/type_limit.h" #include "runtime/runtime_profile.h" #include "runtime/runtime_state.h" #include "storage/field.h" @@ -49,7 +52,6 @@ #include "storage/index/inverted/inverted_index_fs_directory.h" #include "storage/index/inverted/inverted_index_iterator.h" #include "storage/index/inverted/inverted_index_parser.h" -#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/inverted_index_query_type.h" #include "storage/index/inverted/inverted_index_searcher.h" #include "storage/index/inverted/query/phrase_query.h" @@ -59,6 +61,103 @@ #include "storage/types.h" #include "util/faststring.h" +namespace { + +// Encodes a Field's value as a BKD ascending key via KeyCoder. +// CppTypeTraits::CppType is the type KeyCoder expects (e.g. int64_t for +// DATETIME, where PrimitiveTypeTraits gives uint64_t — implicit conversion +// preserves the bit pattern under 2's complement). +template +static void bkd_encode_field(const doris::Field& field, const doris::KeyCoder* coder, + std::string* out) { + using key_t = typename doris::CppTypeTraits::CppType; + key_t kv = doris::PrimitiveTypeConvertor::to_storage_field_type(field.get()); + coder->full_encode_ascending(&kv, out); +} + +template +static void bkd_encode_min(const doris::KeyCoder* coder, std::string* out) { + using key_t = typename doris::CppTypeTraits::CppType; + key_t v = doris::type_limit::min(); + coder->full_encode_ascending(&v, out); +} + +template +static void bkd_encode_max(const doris::KeyCoder* coder, std::string* out) { + using key_t = typename doris::CppTypeTraits::CppType; + key_t v = doris::type_limit::max(); + coder->full_encode_ascending(&v, out); +} + +#define BKD_TYPE_CASES(MACRO) \ + MACRO(OLAP_FIELD_TYPE_BOOL, TYPE_BOOLEAN) \ + MACRO(OLAP_FIELD_TYPE_TINYINT, TYPE_TINYINT) \ + MACRO(OLAP_FIELD_TYPE_SMALLINT, TYPE_SMALLINT) \ + MACRO(OLAP_FIELD_TYPE_INT, TYPE_INT) \ + MACRO(OLAP_FIELD_TYPE_BIGINT, TYPE_BIGINT) \ + MACRO(OLAP_FIELD_TYPE_LARGEINT, TYPE_LARGEINT) \ + MACRO(OLAP_FIELD_TYPE_FLOAT, TYPE_FLOAT) \ + MACRO(OLAP_FIELD_TYPE_DOUBLE, TYPE_DOUBLE) \ + MACRO(OLAP_FIELD_TYPE_DECIMAL, TYPE_DECIMALV2) \ + MACRO(OLAP_FIELD_TYPE_DECIMAL32, TYPE_DECIMAL32) \ + MACRO(OLAP_FIELD_TYPE_DECIMAL64, TYPE_DECIMAL64) \ + MACRO(OLAP_FIELD_TYPE_DECIMAL128I, TYPE_DECIMAL128I) \ + MACRO(OLAP_FIELD_TYPE_DECIMAL256, TYPE_DECIMAL256) \ + MACRO(OLAP_FIELD_TYPE_DATE, TYPE_DATE) \ + MACRO(OLAP_FIELD_TYPE_DATETIME, TYPE_DATETIME) \ + MACRO(OLAP_FIELD_TYPE_DATEV2, TYPE_DATEV2) \ + MACRO(OLAP_FIELD_TYPE_DATETIMEV2, TYPE_DATETIMEV2) \ + MACRO(OLAP_FIELD_TYPE_TIMESTAMPTZ, TYPE_TIMESTAMPTZ) \ + MACRO(OLAP_FIELD_TYPE_IPV4, TYPE_IPV4) \ + MACRO(OLAP_FIELD_TYPE_IPV6, TYPE_IPV6) + +static doris::Status encode_bkd_field_ascending(doris::FieldType ft, const doris::Field& field, + const doris::KeyCoder* coder, std::string* out) { +#define CASE(FT, PT) \ + case doris::FieldType::FT: \ + bkd_encode_field(field, coder, out); \ + return doris::Status::OK(); + switch (ft) { + BKD_TYPE_CASES(CASE) + default: + break; + } +#undef CASE + return doris::Status::InternalError("unsupported BKD field type {}", static_cast(ft)); +} + +static doris::Status encode_bkd_min_ascending(doris::FieldType ft, const doris::KeyCoder* coder, + std::string* out) { +#define CASE(FT, PT) \ + case doris::FieldType::FT: \ + bkd_encode_min(coder, out); \ + return doris::Status::OK(); + switch (ft) { + BKD_TYPE_CASES(CASE) + default: + break; + } +#undef CASE + return doris::Status::InternalError("unsupported BKD field type {}", static_cast(ft)); +} + +static doris::Status encode_bkd_max_ascending(doris::FieldType ft, const doris::KeyCoder* coder, + std::string* out) { +#define CASE(FT, PT) \ + case doris::FieldType::FT: \ + bkd_encode_max(coder, out); \ + return doris::Status::OK(); + switch (ft) { + BKD_TYPE_CASES(CASE) + default: + break; + } +#undef CASE + return doris::Status::InternalError("unsupported BKD field type {}", static_cast(ft)); +} + +} // anonymous namespace + namespace doris::segment_v2 { std::string InvertedIndexReader::get_index_file_path() { @@ -290,19 +389,13 @@ Status FullTextIndexReader::new_iterator(std::unique_ptr* iterato } Status FullTextIndexReader::query(const IndexQueryContextPtr& context, - const std::string& column_name, - const InvertedIndexQueryParam* query_value, + const std::string& column_name, const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx) { SCOPED_RAW_TIMER(&context->stats->inverted_index_query_timer); - const auto* str_param = dynamic_cast(query_value); - if (str_param == nullptr) { - return Status::InternalError("FullTextIndexReader expected StringQueryParam"); - } - std::string search_str; - str_param->get_string(&search_str); + std::string search_str = query_value.get(); VLOG_DEBUG << column_name << " begin to search the fulltext index from clucene, query_str [" << search_str << "]"; @@ -416,18 +509,13 @@ Status StringTypeInvertedIndexReader::new_iterator(std::unique_ptr& bit_map, const InvertedIndexAnalyzerCtx* /*analyzer_ctx*/) { SCOPED_RAW_TIMER(&context->stats->inverted_index_query_timer); - const auto* str_param = dynamic_cast(query_value); - if (str_param == nullptr) { - return Status::InternalError("StringTypeInvertedIndexReader expected StringQueryParam"); - } - std::string search_str; - str_param->get_string(&search_str); + std::string search_str = query_value.get(); // If the written value exceeds ignore_above, it will be written as null. // The queried value exceeds ignore_above means the written value cannot be found. @@ -554,20 +642,26 @@ Status BkdIndexReader::new_iterator(std::unique_ptr* iterator) { } template -Status BkdIndexReader::construct_bkd_query_value(const NumericQueryParam* query_value, +Status BkdIndexReader::construct_bkd_query_value(const Field& query_value, std::shared_ptr r, InvertedIndexVisitor* visitor) { if constexpr (QT == InvertedIndexQueryType::EQUAL_QUERY) { - query_value->encode_ascending(_value_key_coder, &visitor->query_max); - query_value->encode_ascending(_value_key_coder, &visitor->query_min); + RETURN_IF_ERROR(encode_bkd_field_ascending(_type_info->type(), query_value, + _value_key_coder, &visitor->query_max)); + RETURN_IF_ERROR(encode_bkd_field_ascending(_type_info->type(), query_value, + _value_key_coder, &visitor->query_min)); } else if constexpr (QT == InvertedIndexQueryType::LESS_THAN_QUERY || QT == InvertedIndexQueryType::LESS_EQUAL_QUERY) { - query_value->encode_ascending(_value_key_coder, &visitor->query_max); - query_value->encode_min_ascending(_value_key_coder, &visitor->query_min); + RETURN_IF_ERROR(encode_bkd_field_ascending(_type_info->type(), query_value, + _value_key_coder, &visitor->query_max)); + RETURN_IF_ERROR(encode_bkd_min_ascending(_type_info->type(), _value_key_coder, + &visitor->query_min)); } else if constexpr (QT == InvertedIndexQueryType::GREATER_THAN_QUERY || QT == InvertedIndexQueryType::GREATER_EQUAL_QUERY) { - query_value->encode_ascending(_value_key_coder, &visitor->query_min); - query_value->encode_max_ascending(_value_key_coder, &visitor->query_max); + RETURN_IF_ERROR(encode_bkd_field_ascending(_type_info->type(), query_value, + _value_key_coder, &visitor->query_min)); + RETURN_IF_ERROR(encode_bkd_max_ascending(_type_info->type(), _value_key_coder, + &visitor->query_max)); } else { return Status::Error( "invalid query type when query bkd index"); @@ -576,7 +670,7 @@ Status BkdIndexReader::construct_bkd_query_value(const NumericQueryParam* query_ } Status BkdIndexReader::invoke_bkd_try_query(const IndexQueryContextPtr& context, - const NumericQueryParam* query_value, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr r, size_t* count) { @@ -627,8 +721,7 @@ Status BkdIndexReader::invoke_bkd_try_query(const IndexQueryContextPtr& context, } Status BkdIndexReader::invoke_bkd_query(const IndexQueryContextPtr& context, - const NumericQueryParam* query_value, - InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr r, std::shared_ptr& bit_map) { SCOPED_RAW_TIMER(&context->stats->inverted_index_searcher_search_timer); @@ -679,13 +772,8 @@ Status BkdIndexReader::invoke_bkd_query(const IndexQueryContextPtr& context, } Status BkdIndexReader::try_query(const IndexQueryContextPtr& context, - const std::string& column_name, - const InvertedIndexQueryParam* query_value, + const std::string& column_name, const Field& query_value, InvertedIndexQueryType query_type, size_t* count) { - const auto* num_param = dynamic_cast(query_value); - if (num_param == nullptr) { - return Status::InternalError("BkdIndexReader::try_query expected NumericQueryParam"); - } try { std::shared_ptr r; auto st = get_bkd_reader(context, r); @@ -696,7 +784,8 @@ Status BkdIndexReader::try_query(const IndexQueryContextPtr& context, return st; } std::string query_str; - num_param->encode_ascending(_value_key_coder, &query_str); + RETURN_IF_ERROR(encode_bkd_field_ascending(_type_info->type(), query_value, + _value_key_coder, &query_str)); auto index_file_key = _index_file_reader->get_index_file_cache_key(&_index_meta); InvertedIndexQueryCache::CacheKey cache_key {index_file_key, column_name, query_type, @@ -709,7 +798,7 @@ Status BkdIndexReader::try_query(const IndexQueryContextPtr& context, return Status::OK(); } - return invoke_bkd_try_query(context, num_param, query_type, r, count); + return invoke_bkd_try_query(context, query_value, query_type, r, count); } catch (const CLuceneError& e) { return Status::Error( "BKD Query CLuceneError Occurred, error msg: {}", e.what()); @@ -720,16 +809,11 @@ Status BkdIndexReader::try_query(const IndexQueryContextPtr& context, } Status BkdIndexReader::query(const IndexQueryContextPtr& context, const std::string& column_name, - const InvertedIndexQueryParam* query_value, - InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* /*analyzer_ctx*/) { SCOPED_RAW_TIMER(&context->stats->inverted_index_query_timer); - const auto* num_param = dynamic_cast(query_value); - if (num_param == nullptr) { - return Status::InternalError("BkdIndexReader::query expected NumericQueryParam"); - } try { std::shared_ptr r; auto st = get_bkd_reader(context, r); @@ -740,7 +824,8 @@ Status BkdIndexReader::query(const IndexQueryContextPtr& context, const std::str return st; } std::string query_str; - num_param->encode_ascending(_value_key_coder, &query_str); + RETURN_IF_ERROR(encode_bkd_field_ascending(_type_info->type(), query_value, + _value_key_coder, &query_str)); auto index_file_key = _index_file_reader->get_index_file_cache_key(&_index_meta); InvertedIndexQueryCache::CacheKey cache_key {index_file_key, column_name, query_type, @@ -751,7 +836,7 @@ Status BkdIndexReader::query(const IndexQueryContextPtr& context, const std::str return Status::OK(); } - RETURN_IF_ERROR(invoke_bkd_query(context, num_param, query_type, r, bit_map)); + RETURN_IF_ERROR(invoke_bkd_query(context, query_value, query_type, r, bit_map)); bit_map->runOptimize(); cache->insert(cache_key, bit_map, &cache_handler); diff --git a/be/src/storage/index/inverted/inverted_index_reader.h b/be/src/storage/index/inverted/inverted_index_reader.h index d39fa2c60dec00..38fd2e7cda40d6 100644 --- a/be/src/storage/index/inverted/inverted_index_reader.h +++ b/be/src/storage/index/inverted/inverted_index_reader.h @@ -26,6 +26,7 @@ #include "common/status.h" #include "core/data_type/primitive_type.h" +#include "core/field.h" #include "io/fs/file_system.h" #include "io/fs/path.h" #include "storage/index/index_query_context.h" @@ -76,9 +77,6 @@ class InvertedIndexQueryCacheHandle; class IndexFileReader; class InvertedIndexQueryInfo; class IndexIterator; -class InvertedIndexQueryParam; -class StringQueryParam; -class NumericQueryParam; class InvertedIndexResultBitmap { private: @@ -225,17 +223,13 @@ class InvertedIndexReader : public IndexReader { IndexType index_type() override { return IndexType::INVERTED; } - // Callers pass a TypedInvertedIndexQueryParam produced by - // InvertedIndexQueryParamFactory. Each reader static_casts to the - // appropriate intermediate (StringQueryParam / NumericQueryParam) at entry. virtual Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const InvertedIndexQueryParam* query_value, - InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) = 0; virtual Status try_query(const IndexQueryContextPtr& context, const std::string& column_name, - const InvertedIndexQueryParam* query_value, - InvertedIndexQueryType query_type, size_t* count) = 0; + const Field& query_value, InvertedIndexQueryType query_type, + size_t* count) = 0; Status read_null_bitmap(const IndexQueryContextPtr& context, InvertedIndexQueryCacheHandle* cache_handle, @@ -292,11 +286,11 @@ class FullTextIndexReader : public InvertedIndexReader { Status new_iterator(std::unique_ptr* iterator) override; Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override; Status try_query(const IndexQueryContextPtr& context, const std::string& column_name, - const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, size_t* count) override { return Status::Error( "FullTextIndexReader not support try_query"); @@ -317,11 +311,11 @@ class StringTypeInvertedIndexReader : public InvertedIndexReader { Status new_iterator(std::unique_ptr* iterator) override; Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override; Status try_query(const IndexQueryContextPtr& context, const std::string& column_name, - const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, size_t* count) override { return Status::Error( "StringTypeInvertedIndexReader not support try_query"); @@ -377,22 +371,21 @@ class BkdIndexReader : public InvertedIndexReader { Status new_iterator(std::unique_ptr* iterator) override; Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override; Status try_query(const IndexQueryContextPtr& context, const std::string& column_name, - const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, size_t* count) override; - Status invoke_bkd_try_query(const IndexQueryContextPtr& context, - const NumericQueryParam* query_value, + Status invoke_bkd_try_query(const IndexQueryContextPtr& context, const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr r, size_t* count); - Status invoke_bkd_query(const IndexQueryContextPtr& context, - const NumericQueryParam* query_value, InvertedIndexQueryType query_type, + Status invoke_bkd_query(const IndexQueryContextPtr& context, const Field& query_value, + InvertedIndexQueryType query_type, std::shared_ptr r, std::shared_ptr& bit_map); template - Status construct_bkd_query_value(const NumericQueryParam* query_value, + Status construct_bkd_query_value(const Field& query_value, std::shared_ptr r, InvertedIndexVisitor* visitor); diff --git a/be/src/storage/predicate/comparison_predicate.h b/be/src/storage/predicate/comparison_predicate.h index a4e2f84d937ac3..12f57237e7edfa 100644 --- a/be/src/storage/predicate/comparison_predicate.h +++ b/be/src/storage/predicate/comparison_predicate.h @@ -22,9 +22,9 @@ #include "common/compare.h" #include "core/column/column_dictionary.h" +#include "core/field.h" #include "storage/index/bloom_filter/bloom_filter.h" #include "storage/index/inverted/inverted_index_cache.h" // IWYU pragma: keep -#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/inverted_index_reader.h" #include "storage/predicate/column_predicate.h" @@ -93,14 +93,10 @@ class ComparisonPredicateBase final : public ColumnPredicate { return Status::InvalidArgument("invalid comparison predicate type {}", PT); } - std::unique_ptr query_param = nullptr; - RETURN_IF_ERROR( - InvertedIndexQueryParamFactory::create_query_value(&_value, query_param)); - InvertedIndexParam param; param.column_name = name_with_type.first; param.column_type = name_with_type.second; - param.query_value = std::move(query_param); + param.query_value = Field::create_field(_value); param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/storage/predicate/in_list_predicate.h b/be/src/storage/predicate/in_list_predicate.h index eded9322da2011..f7529b7ac69820 100644 --- a/be/src/storage/predicate/in_list_predicate.h +++ b/be/src/storage/predicate/in_list_predicate.h @@ -27,6 +27,7 @@ #include "core/data_type/define_primitive_type.h" #include "core/data_type/primitive_type.h" #include "core/decimal12.h" +#include "core/field.h" #include "core/string_ref.h" #include "core/type_limit.h" #include "core/types.h" @@ -34,7 +35,6 @@ #include "exprs/hybrid_set.h" #include "storage/index/bloom_filter/bloom_filter.h" #include "storage/index/inverted/inverted_index_cache.h" // IWYU pragma: keep -#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/inverted_index_reader.h" #include "storage/olap_common.h" #include "storage/predicate/column_predicate.h" @@ -162,22 +162,20 @@ class InListPredicateBase final : public ColumnPredicate { roaring::Roaring indices; HybridSetBase::IteratorBase* iter = _values->begin(); while (iter->has_next()) { - std::unique_ptr query_param = nullptr; + Field field_value; if constexpr (is_string_type(Type)) { // HybridSet's iter->get_value() yields StringRef*, not std::string*. const auto* ref = (const StringRef*)(iter->get_value()); - RETURN_IF_ERROR( - InvertedIndexQueryParamFactory::create_query_value(ref, query_param)); + field_value = Field::create_field(std::string(ref->data, ref->size)); } else { const T* value = (const T*)(iter->get_value()); - RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( - value, query_param)); + field_value = Field::create_field(*value); } InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY; InvertedIndexParam param; param.column_name = name_with_type.first; param.column_type = name_with_type.second; - param.query_value = std::move(query_param); + param.query_value = field_value; param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/test/exprs/function/function_comparison_evaluate_inverted_index_test.cpp b/be/test/exprs/function/function_comparison_evaluate_inverted_index_test.cpp index 96b4be0d76bac4..d903400797e195 100644 --- a/be/test/exprs/function/function_comparison_evaluate_inverted_index_test.cpp +++ b/be/test/exprs/function/function_comparison_evaluate_inverted_index_test.cpp @@ -38,16 +38,14 @@ class MockInvertedIndexReader : public segment_v2::InvertedIndexReader { } Status query(const segment_v2::IndexQueryContextPtr& context, const std::string& column_name, - const segment_v2::InvertedIndexQueryParam* query_value, - segment_v2::InvertedIndexQueryType query_type, + const Field& query_value, segment_v2::InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override { return Status::OK(); } Status try_query(const segment_v2::IndexQueryContextPtr& context, - const std::string& column_name, - const segment_v2::InvertedIndexQueryParam* query_value, + const std::string& column_name, const Field& query_value, segment_v2::InvertedIndexQueryType query_type, size_t* count) override { return Status::OK(); } diff --git a/be/test/exprs/function/function_ip_test.cpp b/be/test/exprs/function/function_ip_test.cpp index 206a5e6cba2a39..4ac9671dfbda70 100644 --- a/be/test/exprs/function/function_ip_test.cpp +++ b/be/test/exprs/function/function_ip_test.cpp @@ -172,15 +172,13 @@ class MockIndexReader : public segment_v2::InvertedIndexReader { return segment_v2::InvertedIndexReaderType::BKD; } Status query(const segment_v2::IndexQueryContextPtr& context, const std::string& column_name, - const segment_v2::InvertedIndexQueryParam* query_value, - segment_v2::InvertedIndexQueryType query_type, + const Field& query_value, segment_v2::InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override { return Status::OK(); } Status try_query(const segment_v2::IndexQueryContextPtr& context, - const std::string& column_name, - const segment_v2::InvertedIndexQueryParam* query_value, + const std::string& column_name, const Field& query_value, segment_v2::InvertedIndexQueryType query_type, size_t* count) override { return Status::OK(); } diff --git a/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp b/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp index f9f97cf26bb143..b8eada222697aa 100644 --- a/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp +++ b/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp @@ -27,11 +27,11 @@ #include "CLucene/StdHeader.h" #include "CLucene/config/repl_wchar.h" +#include "core/field.h" #include "json2pb/json_to_pb.h" #include "json2pb/pb_to_json.h" #include "storage/compaction/base_compaction.h" #include "storage/index/index_file_reader.h" -#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/query/query_factory.h" #include "storage/rowset/beta_rowset.h" #include "storage/rowset/beta_rowset_writer.h" @@ -164,21 +164,14 @@ class IndexCompactionUtils { for (int i = 0; i < query_data.size(); i++) { Field param_value = Field::create_field(int32_t(query_data[i])); - std::unique_ptr query_param = nullptr; - EXPECT_TRUE(segment_v2::InvertedIndexQueryParamFactory::create_query_value( - PrimitiveType::TYPE_INT, ¶m_value, query_param) - .ok()); auto result = std::make_shared(); OlapReaderStatistics stats; IndexQueryContextPtr context = std::make_shared(); context->stats = &stats; - const auto* num_param = - dynamic_cast(query_param.get()); - EXPECT_NE(num_param, nullptr); EXPECT_TRUE(idx_reader - ->invoke_bkd_query(context, num_param, + ->invoke_bkd_query(context, param_value, InvertedIndexQueryType::EQUAL_QUERY, *bkd_searcher, result) .ok()); diff --git a/be/test/storage/segment/index_reader_helper_test.cpp b/be/test/storage/segment/index_reader_helper_test.cpp index 7d3c47b00b8730..d52e036f22997a 100644 --- a/be/test/storage/segment/index_reader_helper_test.cpp +++ b/be/test/storage/segment/index_reader_helper_test.cpp @@ -63,16 +63,14 @@ class MockInvertedIndexReader : public InvertedIndexReader { } MOCK_FUNCTION Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const InvertedIndexQueryParam* query_value, - InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override { return Status::OK(); } MOCK_FUNCTION Status try_query(const IndexQueryContextPtr& context, - const std::string& column_name, - const InvertedIndexQueryParam* query_value, + const std::string& column_name, const Field& query_value, InvertedIndexQueryType query_type, size_t* count) override { return Status::OK(); } diff --git a/be/test/storage/segment/inverted_index_iterator_test.cpp b/be/test/storage/segment/inverted_index_iterator_test.cpp index bf40089fc3e964..cbbb910f65c549 100644 --- a/be/test/storage/segment/inverted_index_iterator_test.cpp +++ b/be/test/storage/segment/inverted_index_iterator_test.cpp @@ -54,14 +54,14 @@ class MockInvertedIndexReader : public InvertedIndexReader { } Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& roaring, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override { return Status::OK(); } Status try_query(const IndexQueryContextPtr& context, const std::string& column_name, - const InvertedIndexQueryParam* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, size_t* count) override { *count = 0; return Status::OK(); diff --git a/be/test/storage/segment/inverted_index_query_param_test.cpp b/be/test/storage/segment/inverted_index_query_param_test.cpp index e28e917c6b82a6..5c06c1e5e5732b 100644 --- a/be/test/storage/segment/inverted_index_query_param_test.cpp +++ b/be/test/storage/segment/inverted_index_query_param_test.cpp @@ -15,616 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include "storage/index/inverted/inverted_index_query_param.h" +// inverted_index_query_param.h has been removed; its functionality is now +// internal to BkdIndexReader. Tests are covered by inverted_index_reader_test.cpp. #include -#include "common/status.h" -#include "core/data_type/primitive_type.h" -#include "core/field.h" -#include "storage/index/inverted/inverted_index_reader.h" - namespace doris::segment_v2 { - -class InvertedIndexQueryParamTest : public testing::Test { -public: - void SetUp() override {} - void TearDown() override {} -}; - -// ==================== Integer Types Tests ==================== - -TEST_F(InvertedIndexQueryParamTest, TestBooleanWithField) { - auto field = Field::create_field(static_cast(1)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_BOOLEAN, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, true); -} - -TEST_F(InvertedIndexQueryParamTest, TestBooleanWithFieldFalse) { - auto field = Field::create_field(static_cast(0)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_BOOLEAN, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, false); -} - -TEST_F(InvertedIndexQueryParamTest, TestBooleanTemplateWithNativeValue) { - bool input_value = true; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, true); -} - -TEST_F(InvertedIndexQueryParamTest, TestTinyIntWithField) { - auto field = Field::create_field(static_cast(42)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_TINYINT, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, 42); -} - -TEST_F(InvertedIndexQueryParamTest, TestTinyIntTemplateWithNativeValue) { - int8_t input_value = -100; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, -100); -} - -TEST_F(InvertedIndexQueryParamTest, TestSmallIntWithField) { - auto field = Field::create_field(static_cast(1234)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_SMALLINT, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, 1234); -} - -TEST_F(InvertedIndexQueryParamTest, TestSmallIntTemplateWithNativeValue) { - int16_t input_value = -32000; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, -32000); -} - -TEST_F(InvertedIndexQueryParamTest, TestIntWithField) { - auto field = Field::create_field(static_cast(123456)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_INT, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, 123456); -} - -TEST_F(InvertedIndexQueryParamTest, TestIntTemplateWithNativeValue) { - int32_t input_value = -2147483647; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, -2147483647); -} - -TEST_F(InvertedIndexQueryParamTest, TestBigIntWithField) { - auto field = Field::create_field(static_cast(9223372036854775807LL)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_BIGINT, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, 9223372036854775807LL); -} - -TEST_F(InvertedIndexQueryParamTest, TestBigIntTemplateWithNativeValue) { - int64_t input_value = -9223372036854775807LL; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, -9223372036854775807LL); -} - -TEST_F(InvertedIndexQueryParamTest, TestLargeIntWithField) { - Int128 large_value = 12345678901234567890ULL; - auto field = Field::create_field(large_value); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_LARGEINT, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, static_cast<__int128_t>(large_value)); -} - -TEST_F(InvertedIndexQueryParamTest, TestLargeIntTemplateWithNativeValue) { - __int128_t input_value = 12345678901234567890ULL; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, input_value); -} - -// ==================== Float/Double Types Tests ==================== - -TEST_F(InvertedIndexQueryParamTest, TestFloatWithField) { - auto field = Field::create_field(static_cast(3.14f)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_FLOAT, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_FLOAT_EQ(*value, 3.14f); -} - -TEST_F(InvertedIndexQueryParamTest, TestFloatTemplateWithNativeValue) { - float input_value = -1.23456f; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_FLOAT_EQ(*value, -1.23456f); -} - -TEST_F(InvertedIndexQueryParamTest, TestDoubleWithField) { - auto field = Field::create_field(static_cast(3.14159265358979)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DOUBLE, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_DOUBLE_EQ(*value, 3.14159265358979); -} - -TEST_F(InvertedIndexQueryParamTest, TestDoubleTemplateWithNativeValue) { - double input_value = -9.87654321e10; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_DOUBLE_EQ(*value, -9.87654321e10); -} - -// ==================== String Types Tests ==================== - -TEST_F(InvertedIndexQueryParamTest, TestCharWithField) { - String str = "hello"; - auto field = Field::create_field(str); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_CHAR, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, "hello"); -} - -TEST_F(InvertedIndexQueryParamTest, TestVarcharWithField) { - String str = "world"; - auto field = Field::create_field(str); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_VARCHAR, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, "world"); -} - -TEST_F(InvertedIndexQueryParamTest, TestStringWithField) { - String str = "test string content"; - auto field = Field::create_field(str); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRING, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, "test string content"); -} - -TEST_F(InvertedIndexQueryParamTest, TestStringTemplateWithStringRef) { - std::string str_data = "string ref test"; - StringRef str_ref(str_data.data(), str_data.size()); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &str_ref, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, "string ref test"); -} - -TEST_F(InvertedIndexQueryParamTest, TestVarcharTemplateWithStringRef) { - std::string str_data = "varchar ref test"; - StringRef str_ref(str_data.data(), str_data.size()); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &str_ref, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, "varchar ref test"); -} - -TEST_F(InvertedIndexQueryParamTest, TestCharTemplateWithStringRef) { - std::string str_data = "char ref test"; - StringRef str_ref(str_data.data(), str_data.size()); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(&str_ref, - param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, "char ref test"); -} - -TEST_F(InvertedIndexQueryParamTest, TestStringWithEmptyValue) { - String str = ""; - auto field = Field::create_field(str); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRING, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, ""); -} - -TEST_F(InvertedIndexQueryParamTest, TestStringWithSpecialCharacters) { - String str = "hello\nworld\t!@#$%^&*()"; - auto field = Field::create_field(str); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRING, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, "hello\nworld\t!@#$%^&*()"); -} - -// ==================== Decimal Types Tests ==================== - -TEST_F(InvertedIndexQueryParamTest, TestDecimalV2WithField) { - // DecimalV2 uses Int128 as underlying storage - Int128 dec_value = 123456789; - auto field = Field::create_field(DecimalV2Value(dec_value)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DECIMALV2, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -TEST_F(InvertedIndexQueryParamTest, TestDecimal32WithField) { - // Decimal32 uses Int64 for Field storage - Int64 dec_value = 12345; - auto field = Field::create_field(dec_value); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DECIMAL32, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -TEST_F(InvertedIndexQueryParamTest, TestDecimal64WithField) { - // Decimal64 uses Int64 for Field storage - Int64 dec_value = 123456789012; - auto field = Field::create_field(dec_value); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DECIMAL64, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -TEST_F(InvertedIndexQueryParamTest, TestDecimal128IWithField) { - // Decimal128I uses Int128 for Field storage - Int128 dec_value = 123456789012345LL; - auto field = Field::create_field(dec_value); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - PrimitiveType::TYPE_DECIMAL128I, &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -TEST_F(InvertedIndexQueryParamTest, TestDecimal256WithField) { - // Decimal256 uses Int128 for Field storage - Int128 dec_value = 123456789012345LL; - auto field = Field::create_field(Decimal(dec_value)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DECIMAL256, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -// ==================== Date/Time Types Tests ==================== - -TEST_F(InvertedIndexQueryParamTest, TestDateWithField) { - VecDateTimeValue tmp; - tmp.from_date_int64(20231205); - auto field = Field::create_field(tmp); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DATE, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -TEST_F(InvertedIndexQueryParamTest, TestDateTimeWithField) { - VecDateTimeValue tmp; - tmp.create_from_olap_datetime(20231205120000LL); - auto field = Field::create_field(tmp); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DATETIME, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -TEST_F(InvertedIndexQueryParamTest, TestDateV2WithField) { - UInt64 v = 20231205; - typename PrimitiveTypeTraits::CppType tmp; - tmp.from_date_int64(v); - auto field = Field::create_field(tmp); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DATEV2, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -TEST_F(InvertedIndexQueryParamTest, TestDateTimeV2WithField) { - UInt64 v = 20231205120000LL; - auto field = Field::create_field( - *(typename PrimitiveTypeTraits::CppType*)&v); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DATETIMEV2, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -// ==================== IP Types Tests ==================== - -TEST_F(InvertedIndexQueryParamTest, TestIPv4WithField) { - auto field = Field::create_field(IPv4(3232235521)); // 192.168.0.1 - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_IPV4, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, IPv4(3232235521)); -} - -TEST_F(InvertedIndexQueryParamTest, TestIPv4TemplateWithNativeValue) { - IPv4 input_value(2130706433); // 127.0.0.1 - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(&input_value, - param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, IPv4(2130706433)); -} - -TEST_F(InvertedIndexQueryParamTest, TestIPv6WithField) { - IPv6 ipv6_value = 1; - auto field = Field::create_field(ipv6_value); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_IPV6, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, ipv6_value); -} - -TEST_F(InvertedIndexQueryParamTest, TestIPv6TemplateWithNativeValue) { - IPv6 input_value = 12345678901234567890ULL; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(&input_value, - param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, input_value); -} - -// ==================== Unsupported Type Test ==================== - -TEST_F(InvertedIndexQueryParamTest, TestUnsupportedType) { - auto field = Field::create_field(static_cast(0)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_ARRAY, - &field, param); - ASSERT_FALSE(status.ok()); - EXPECT_EQ(status.code(), ErrorCode::NOT_IMPLEMENTED_ERROR); -} - -TEST_F(InvertedIndexQueryParamTest, TestUnsupportedTypeMap) { - auto field = Field::create_field(static_cast(0)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_MAP, - &field, param); - ASSERT_FALSE(status.ok()); -} - -TEST_F(InvertedIndexQueryParamTest, TestUnsupportedTypeStruct) { - auto field = Field::create_field(static_cast(0)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRUCT, - &field, param); - ASSERT_FALSE(status.ok()); -} - -// ==================== Edge Cases Tests ==================== - -TEST_F(InvertedIndexQueryParamTest, TestIntegerBoundaryMin) { - // Test minimum values - { - auto field = Field::create_field(static_cast(-128)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - PrimitiveType::TYPE_TINYINT, &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, -128); - } - { - auto field = Field::create_field(static_cast(-32768)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - PrimitiveType::TYPE_SMALLINT, &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, -32768); - } -} - -TEST_F(InvertedIndexQueryParamTest, TestIntegerBoundaryMax) { - // Test maximum values - { - auto field = Field::create_field(static_cast(127)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - PrimitiveType::TYPE_TINYINT, &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, 127); - } - { - auto field = Field::create_field(static_cast(32767)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - PrimitiveType::TYPE_SMALLINT, &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, 32767); - } -} - -TEST_F(InvertedIndexQueryParamTest, TestZeroValues) { - // Test zero values for different types - { - auto field = Field::create_field(static_cast(0)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_INT, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, 0); - } - { - auto field = Field::create_field(static_cast(0.0)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DOUBLE, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_DOUBLE_EQ(*value, 0.0); - } -} - -TEST_F(InvertedIndexQueryParamTest, TestFloatSpecialValues) { - // Test infinity - { - auto field = Field::create_field( - static_cast(std::numeric_limits::infinity())); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DOUBLE, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_TRUE(std::isinf(*value)); - } - // Test negative infinity - { - auto field = Field::create_field( - static_cast(-std::numeric_limits::infinity())); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DOUBLE, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_TRUE(std::isinf(*value)); - EXPECT_LT(*value, 0); - } -} - -TEST_F(InvertedIndexQueryParamTest, TestStringWithUnicodeCharacters) { - String str = "你好世界 🌍 日本語"; - auto field = Field::create_field(str); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRING, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(*value, "你好世界 🌍 日本語"); -} - -TEST_F(InvertedIndexQueryParamTest, TestLongString) { - std::string long_str(10000, 'x'); - String str(long_str); - auto field = Field::create_field(str); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRING, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = - &static_cast*>(param.get())->value(); - EXPECT_EQ(value->size(), 10000); - EXPECT_EQ(*value, long_str); -} - -} // namespace doris::segment_v2 \ No newline at end of file +// No tests here; see inverted_index_reader_test.cpp. +} // namespace doris::segment_v2 diff --git a/be/test/storage/segment/inverted_index_reader_test.cpp b/be/test/storage/segment/inverted_index_reader_test.cpp index 3f575b6039254c..bac61e551201cd 100644 --- a/be/test/storage/segment/inverted_index_reader_test.cpp +++ b/be/test/storage/segment/inverted_index_reader_test.cpp @@ -29,13 +29,14 @@ #include #include +#include "core/field.h" +#include "core/value/vdatetime_value.h" #include "runtime/runtime_state.h" #include "storage/field.h" #include "storage/index/index_file_reader.h" #include "storage/index/index_file_writer.h" #include "storage/index/inverted/inverted_index_desc.h" #include "storage/index/inverted/inverted_index_iterator.h" -#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/inverted_index_writer.h" #include "storage/tablet/tablet_schema.h" #include "storage/tablet/tablet_schema_helper.h" @@ -333,9 +334,8 @@ class InvertedIndexReaderTest : public testing::Test { context->io_ctx = &io_ctx; context->stats = &stats; context->runtime_state = &runtime_state; - auto qp_335 = TypedInvertedIndexQueryParam::create_unique(); - qp_335->set_value(&str_ref); - auto query_status = str_reader->query(context, field_name, qp_335.get(), + Field qp_335 = Field::create_field(std::string(str_ref.data, str_ref.size)); + auto query_status = str_reader->query(context, field_name, qp_335, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -348,9 +348,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string not_exist = "orange"; StringRef not_exist_ref(not_exist.c_str(), not_exist.length()); - auto qp_348 = TypedInvertedIndexQueryParam::create_unique(); - qp_348->set_value(¬_exist_ref); - query_status = str_reader->query(context, field_name, qp_348.get(), + Field qp_348 = Field::create_field( + std::string(not_exist_ref.data, not_exist_ref.size)); + query_status = str_reader->query(context, field_name, qp_348, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -443,9 +443,8 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto qp_441 = TypedInvertedIndexQueryParam::create_unique(); - qp_441->set_value(&query_value); - auto query_status = bkd_reader->query(context, field_name, qp_441.get(), + Field qp_441 = Field::create_field(query_value); + auto query_status = bkd_reader->query(context, field_name, qp_441, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -457,9 +456,8 @@ class InvertedIndexReaderTest : public testing::Test { bitmap = std::make_shared(); int32_t less_than_value = 100; - auto qp_453 = TypedInvertedIndexQueryParam::create_unique(); - qp_453->set_value(&less_than_value); - query_status = bkd_reader->query(context, field_name, qp_453.get(), + Field qp_453 = Field::create_field(less_than_value); + query_status = bkd_reader->query(context, field_name, qp_453, InvertedIndexQueryType::LESS_THAN_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -471,9 +469,8 @@ class InvertedIndexReaderTest : public testing::Test { bitmap = std::make_shared(); int32_t greater_than_value = 100; - auto qp_465 = TypedInvertedIndexQueryParam::create_unique(); - qp_465->set_value(&greater_than_value); - query_status = bkd_reader->query(context, field_name, qp_465.get(), + Field qp_465 = Field::create_field(greater_than_value); + query_status = bkd_reader->query(context, field_name, qp_465, InvertedIndexQueryType::GREATER_THAN_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -532,9 +529,8 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto qp_524 = TypedInvertedIndexQueryParam::create_unique(); - qp_524->set_value(&str_ref); - auto query_status = str_reader->query(context, field_name, qp_524.get(), + Field qp_524 = Field::create_field(std::string(str_ref.data, str_ref.size)); + auto query_status = str_reader->query(context, field_name, qp_524, InvertedIndexQueryType::EQUAL_QUERY, bitmap1); EXPECT_TRUE(query_status.ok()) << query_status; @@ -544,9 +540,8 @@ class InvertedIndexReaderTest : public testing::Test { // Second query with same value, should be cache hit std::shared_ptr bitmap2 = std::make_shared(); - auto qp_534 = TypedInvertedIndexQueryParam::create_unique(); - qp_534->set_value(&str_ref); - query_status = str_reader->query(context, field_name, qp_534.get(), + Field qp_534 = Field::create_field(std::string(str_ref.data, str_ref.size)); + query_status = str_reader->query(context, field_name, qp_534, InvertedIndexQueryType::EQUAL_QUERY, bitmap2); EXPECT_TRUE(query_status.ok()) << query_status; @@ -604,9 +599,8 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto qp_592 = TypedInvertedIndexQueryParam::create_unique(); - qp_592->set_value(&str_ref); - auto query_status = str_reader->query(context, field_name, qp_592.get(), + Field qp_592 = Field::create_field(std::string(str_ref.data, str_ref.size)); + auto query_status = str_reader->query(context, field_name, qp_592, InvertedIndexQueryType::EQUAL_QUERY, bitmap1); EXPECT_TRUE(query_status.ok()) << query_status; @@ -617,9 +611,8 @@ class InvertedIndexReaderTest : public testing::Test { std::shared_ptr bitmap2 = std::make_shared(); StringRef str_ref2(values[1].data, values[1].size); // "banana" - auto qp_603 = TypedInvertedIndexQueryParam::create_unique(); - qp_603->set_value(&str_ref2); - query_status = str_reader->query(context, field_name, qp_603.get(), + Field qp_603 = Field::create_field(std::string(str_ref2.data, str_ref2.size)); + query_status = str_reader->query(context, field_name, qp_603, InvertedIndexQueryType::EQUAL_QUERY, bitmap2); EXPECT_TRUE(query_status.ok()) << query_status; @@ -676,9 +669,9 @@ class InvertedIndexReaderTest : public testing::Test { std::shared_ptr bitmap = std::make_shared(); StringRef term_ref(term.data(), term.size()); - auto qp_660 = TypedInvertedIndexQueryParam::create_unique(); - qp_660->set_value(&term_ref); - auto status = str_reader->query(context, field_name, qp_660.get(), + Field qp_660 = + Field::create_field(std::string(term_ref.data, term_ref.size)); + auto status = str_reader->query(context, field_name, qp_660, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()) << status; EXPECT_EQ(1, bitmap->cardinality()); @@ -809,9 +802,8 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto qp_791 = TypedInvertedIndexQueryParam::create_unique(); - qp_791->set_value(&str_ref); - auto query_status = str_reader->query(context, field_name, qp_791.get(), + Field qp_791 = Field::create_field(std::string(str_ref.data, str_ref.size)); + auto query_status = str_reader->query(context, field_name, qp_791, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -826,9 +818,9 @@ class InvertedIndexReaderTest : public testing::Test { query_term = "apple"; StringRef str_ref_a(query_term.c_str(), query_term.length()); - auto qp_806 = TypedInvertedIndexQueryParam::create_unique(); - qp_806->set_value(&str_ref_a); - query_status = str_reader->query(context, field_name, qp_806.get(), + Field qp_806 = + Field::create_field(std::string(str_ref_a.data, str_ref_a.size)); + query_status = str_reader->query(context, field_name, qp_806, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -908,10 +900,10 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto qp_887 = TypedInvertedIndexQueryParam::create_unique(); - qp_887->set_value(&str_ref); + Field qp_887 = + Field::create_field(std::string(str_ref.data, str_ref.size)); auto query_status = - str_reader->query(context, field_name, qp_887.get(), + str_reader->query(context, field_name, qp_887, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -927,9 +919,9 @@ class InvertedIndexReaderTest : public testing::Test { query_term = "term_a"; StringRef str_ref_a(query_term.c_str(), query_term.length()); - auto qp_903 = TypedInvertedIndexQueryParam::create_unique(); - qp_903->set_value(&str_ref_a); - query_status = str_reader->query(context, field_name, qp_903.get(), + Field qp_903 = + Field::create_field(std::string(str_ref_a.data, str_ref_a.size)); + query_status = str_reader->query(context, field_name, qp_903, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -945,9 +937,9 @@ class InvertedIndexReaderTest : public testing::Test { query_term = "noexist"; StringRef str_ref_no_term(query_term.c_str(), query_term.length()); - auto qp_919 = TypedInvertedIndexQueryParam::create_unique(); - qp_919->set_value(&str_ref_no_term); - query_status = str_reader->query(context, field_name, qp_919.get(), + Field qp_919 = Field::create_field( + std::string(str_ref_no_term.data, str_ref_no_term.size)); + query_status = str_reader->query(context, field_name, qp_919, InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; EXPECT_EQ(bitmap->cardinality(), 0) << "V3: Should find 0 documents matching 'noexist'"; @@ -996,9 +988,9 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto qp_968 = TypedInvertedIndexQueryParam::create_unique(); - qp_968->set_value(&str_ref); - auto query_status = str_reader->query(context, field_name, qp_968.get(), + Field qp_968 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto query_status = str_reader->query(context, field_name, qp_968, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -1014,9 +1006,9 @@ class InvertedIndexReaderTest : public testing::Test { query_term = "term_a"; StringRef str_ref_a(query_term.c_str(), query_term.length()); - auto qp_984 = TypedInvertedIndexQueryParam::create_unique(); - qp_984->set_value(&str_ref_a); - query_status = str_reader->query(context, field_name, qp_984.get(), + Field qp_984 = + Field::create_field(std::string(str_ref_a.data, str_ref_a.size)); + query_status = str_reader->query(context, field_name, qp_984, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -1032,9 +1024,9 @@ class InvertedIndexReaderTest : public testing::Test { query_term = "noexist"; StringRef str_ref_no_term(query_term.c_str(), query_term.length()); - auto qp_1000 = TypedInvertedIndexQueryParam::create_unique(); - qp_1000->set_value(&str_ref_no_term); - query_status = str_reader->query(context, field_name, qp_1000.get(), + Field qp_1000 = Field::create_field( + std::string(str_ref_no_term.data, str_ref_no_term.size)); + query_status = str_reader->query(context, field_name, qp_1000, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; EXPECT_EQ(bitmap->cardinality(), 0) << "V3: Should find 0 documents matching 'noexist'"; @@ -1101,9 +1093,8 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto qp_1067 = TypedInvertedIndexQueryParam::create_unique(); - qp_1067->set_value(&str_ref); - auto query_status = index_reader->query(context, field_name, qp_1067.get(), + Field qp_1067 = Field::create_field(std::string(str_ref.data, str_ref.size)); + auto query_status = index_reader->query(context, field_name, qp_1067, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); ASSERT_TRUE(query_status.ok()) << "Query failed for term '" << query_term << "' in file " @@ -2125,9 +2116,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string field_name = "1"; // c2 unique_id StringRef query_val(values[0].data, values[0].size); - auto qp_2089 = TypedInvertedIndexQueryParam::create_unique(); - qp_2089->set_value(&query_val); - Status st = mock_reader->query(context, field_name, qp_2089.get(), + Field qp_2089 = + Field::create_field(std::string(query_val.data, query_val.size)); + Status st = mock_reader->query(context, field_name, qp_2089, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_FALSE(st.ok()); @@ -2189,9 +2180,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query_term = "world"; StringRef query_val(query_term.data(), query_term.size()); - auto qp_2151 = TypedInvertedIndexQueryParam::create_unique(); - qp_2151->set_value(&query_val); - Status st = mock_reader->query(context, field_name, qp_2151.get(), + Field qp_2151 = + Field::create_field(std::string(query_val.data, query_val.size)); + Status st = mock_reader->query(context, field_name, qp_2151, InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap); EXPECT_FALSE(st.ok()); @@ -2201,9 +2192,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string phrase_query = "Apache Doris"; StringRef phrase_query_val(phrase_query.data(), phrase_query.size()); - auto qp_2161 = TypedInvertedIndexQueryParam::create_unique(); - qp_2161->set_value(&phrase_query_val); - st = mock_reader->query(context, field_name, qp_2161.get(), + Field qp_2161 = Field::create_field( + std::string(phrase_query_val.data, phrase_query_val.size)); + st = mock_reader->query(context, field_name, qp_2161, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_FALSE(st.ok()); @@ -2316,9 +2307,9 @@ class InvertedIndexReaderTest : public testing::Test { std::shared_ptr bitmap = std::make_shared(); std::string query_lower = "apple"; // lowercase StringRef str_ref(query_lower.c_str(), query_lower.length()); - auto qp_2274 = TypedInvertedIndexQueryParam::create_unique(); - qp_2274->set_value(&str_ref); - auto status = str_reader->query(context, "c2", qp_2274.get(), + Field qp_2274 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto status = str_reader->query(context, "c2", qp_2274, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); EXPECT_GT(bitmap->cardinality(), 0) << "Should find 'Apple' with lowercase query"; @@ -2358,9 +2349,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string long_query = "this_is_a_very_long_string_that_exceeds_ignore_above_limit"; StringRef str_ref(long_query.c_str(), long_query.length()); - auto qp_2314 = TypedInvertedIndexQueryParam::create_unique(); - qp_2314->set_value(&str_ref); - auto status = str_reader->query(context, "c2", qp_2314.get(), + Field qp_2314 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto status = str_reader->query(context, "c2", qp_2314, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_FALSE(status.ok()); EXPECT_EQ(status.code(), ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED); @@ -2422,9 +2413,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "quick database"; StringRef query_ref(query.c_str(), query.length()); - auto qp_2376 = TypedInvertedIndexQueryParam::create_unique(); - qp_2376->set_value(&query_ref); - auto status = fulltext_reader->query(context, "c2", qp_2376.get(), + Field qp_2376 = + Field::create_field(std::string(query_ref.data, query_ref.size)); + auto status = fulltext_reader->query(context, "c2", qp_2376, InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap); EXPECT_TRUE(status.ok()); EXPECT_GT(bitmap->cardinality(), 0) @@ -2437,9 +2428,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "search fast"; StringRef query_ref(query.c_str(), query.length()); - auto qp_2389 = TypedInvertedIndexQueryParam::create_unique(); - qp_2389->set_value(&query_ref); - auto status = fulltext_reader->query(context, "c2", qp_2389.get(), + Field qp_2389 = + Field::create_field(std::string(query_ref.data, query_ref.size)); + auto status = fulltext_reader->query(context, "c2", qp_2389, InvertedIndexQueryType::MATCH_ALL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -2450,10 +2441,10 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "quick brown"; StringRef query_ref(query.c_str(), query.length()); - auto qp = TypedInvertedIndexQueryParam::create_unique(); - qp->set_value(&query_ref); + Field qp = + Field::create_field(std::string(query_ref.data, query_ref.size)); auto status = fulltext_reader->query( - context, "c2", qp.get(), InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); + context, "c2", qp, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -2463,9 +2454,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "sear"; StringRef query_ref(query.c_str(), query.length()); - auto qp_2411 = TypedInvertedIndexQueryParam::create_unique(); - qp_2411->set_value(&query_ref); - auto status = fulltext_reader->query(context, "c2", qp_2411.get(), + Field qp_2411 = + Field::create_field(std::string(query_ref.data, query_ref.size)); + auto status = fulltext_reader->query(context, "c2", qp_2411, InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY, bitmap); EXPECT_TRUE(status.ok()); @@ -2477,10 +2468,10 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "qu.*k"; StringRef query_ref(query.c_str(), query.length()); - auto qp = TypedInvertedIndexQueryParam::create_unique(); - qp->set_value(&query_ref); + Field qp = + Field::create_field(std::string(query_ref.data, query_ref.size)); auto status = fulltext_reader->query( - context, "c2", qp.get(), InvertedIndexQueryType::MATCH_REGEXP_QUERY, bitmap); + context, "c2", qp, InvertedIndexQueryType::MATCH_REGEXP_QUERY, bitmap); EXPECT_TRUE(status.ok()); } } @@ -2538,11 +2529,10 @@ class InvertedIndexReaderTest : public testing::Test { std::shared_ptr bitmap = std::make_shared(); StringRef str_ref("test1", 5); - auto param_qp = TypedInvertedIndexQueryParam::create_unique(); - param_qp->set_value(&str_ref); InvertedIndexParam param; param.column_name = "c2"; - param.query_value = std::move(param_qp); + param.query_value = + Field::create_field(std::string(str_ref.data, str_ref.size)); param.query_type = InvertedIndexQueryType::EQUAL_QUERY; param.num_rows = 3; param.roaring = bitmap; @@ -2555,11 +2545,10 @@ class InvertedIndexReaderTest : public testing::Test { size_t count = 0; auto* inverted_index_iterator = static_cast(iterator.get()); inverted_index_iterator->set_context(context); - auto try_qp = TypedInvertedIndexQueryParam::create_unique(); - try_qp->set_value(&str_ref); + Field try_qp = Field::create_field(std::string(str_ref.data, str_ref.size)); status = inverted_index_iterator->try_read_from_inverted_index( - std::static_pointer_cast(inverted_index_reader), "c2", - try_qp.get(), InvertedIndexQueryType::EQUAL_QUERY, &count); + std::static_pointer_cast(inverted_index_reader), "c2", try_qp, + InvertedIndexQueryType::EQUAL_QUERY, &count); EXPECT_TRUE(status.ok()); } @@ -2631,9 +2620,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "500"; StringRef str_ref(query.c_str(), query.length()); - auto qp_2571 = TypedInvertedIndexQueryParam::create_unique(); - qp_2571->set_value(&str_ref); - auto status = str_reader->query(context, "c2", qp_2571.get(), + Field qp_2571 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto status = str_reader->query(context, "c2", qp_2571, InvertedIndexQueryType::LESS_THAN_QUERY, bitmap); // This might succeed or fail depending on the implementation limits // The important thing is we handle the potential TooManyClauses error gracefully @@ -2670,9 +2659,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string empty_query = ""; StringRef str_ref(empty_query.c_str(), empty_query.length()); - auto qp_2608 = TypedInvertedIndexQueryParam::create_unique(); - qp_2608->set_value(&str_ref); - auto status = fulltext_reader->query(context, "c2", qp_2608.get(), + Field qp_2608 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto status = fulltext_reader->query(context, "c2", qp_2608, InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap); // Should either succeed with empty result or fail gracefully } @@ -2814,9 +2803,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "500"; StringRef str_ref(query.c_str(), query.length()); - auto qp_2750 = TypedInvertedIndexQueryParam::create_unique(); - qp_2750->set_value(&str_ref); - auto status = str_reader->query(context, "c2", qp_2750.get(), + Field qp_2750 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto status = str_reader->query(context, "c2", qp_2750, InvertedIndexQueryType::LESS_THAN_QUERY, bitmap); // This might succeed or fail depending on the implementation limits // The important thing is we handle the potential TooManyClauses error gracefully @@ -2853,9 +2842,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string empty_query; StringRef str_ref(empty_query.c_str(), empty_query.length()); - auto qp_2787 = TypedInvertedIndexQueryParam::create_unique(); - qp_2787->set_value(&str_ref); - auto status = fulltext_reader->query(context, "c2", qp_2787.get(), + Field qp_2787 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto status = fulltext_reader->query(context, "c2", qp_2787, InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap); // Should either succeed with empty result or fail gracefully } @@ -2906,10 +2895,10 @@ class InvertedIndexReaderTest : public testing::Test { std::string regexp_query = "test.*"; StringRef query_ref(regexp_query.c_str(), regexp_query.length()); - auto qp = TypedInvertedIndexQueryParam::create_unique(); - qp->set_value(&query_ref); + Field qp = + Field::create_field(std::string(query_ref.data, query_ref.size)); auto status = fulltext_reader->query( - context, "c2", qp.get(), InvertedIndexQueryType::MATCH_REGEXP_QUERY, bitmap); + context, "c2", qp, InvertedIndexQueryType::MATCH_REGEXP_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -2944,57 +2933,57 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "cherry"; StringRef str_ref(query.c_str(), query.length()); - auto qp_2874 = TypedInvertedIndexQueryParam::create_unique(); - qp_2874->set_value(&str_ref); - auto status = str_reader->query(context, "c2", qp_2874.get(), + Field qp_2874 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto status = str_reader->query(context, "c2", qp_2874, InvertedIndexQueryType::LESS_THAN_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test LESS_EQUAL_QUERY bitmap = std::make_shared(); - auto qp_2880 = TypedInvertedIndexQueryParam::create_unique(); - qp_2880->set_value(&str_ref); - status = str_reader->query(context, "c2", qp_2880.get(), + Field qp_2880 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + status = str_reader->query(context, "c2", qp_2880, InvertedIndexQueryType::LESS_EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test GREATER_THAN_QUERY bitmap = std::make_shared(); - auto qp_2886 = TypedInvertedIndexQueryParam::create_unique(); - qp_2886->set_value(&str_ref); - status = str_reader->query(context, "c2", qp_2886.get(), + Field qp_2886 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + status = str_reader->query(context, "c2", qp_2886, InvertedIndexQueryType::GREATER_THAN_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test GREATER_EQUAL_QUERY bitmap = std::make_shared(); - auto qp_2892 = TypedInvertedIndexQueryParam::create_unique(); - qp_2892->set_value(&str_ref); - status = str_reader->query(context, "c2", qp_2892.get(), + Field qp_2892 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + status = str_reader->query(context, "c2", qp_2892, InvertedIndexQueryType::GREATER_EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test MATCH_PHRASE_QUERY for StringType bitmap = std::make_shared(); - auto qp_2898 = TypedInvertedIndexQueryParam::create_unique(); - qp_2898->set_value(&str_ref); - status = str_reader->query(context, "c2", qp_2898.get(), + Field qp_2898 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + status = str_reader->query(context, "c2", qp_2898, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test MATCH_PHRASE_PREFIX_QUERY for StringType bitmap = std::make_shared(); - auto qp_2904 = TypedInvertedIndexQueryParam::create_unique(); - qp_2904->set_value(&str_ref); - status = str_reader->query(context, "c2", qp_2904.get(), + Field qp_2904 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + status = str_reader->query(context, "c2", qp_2904, InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test MATCH_REGEXP_QUERY for StringType bitmap = std::make_shared(); - auto qp_2910 = TypedInvertedIndexQueryParam::create_unique(); - qp_2910->set_value(&str_ref); - status = str_reader->query(context, "c2", qp_2910.get(), + Field qp_2910 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + status = str_reader->query(context, "c2", qp_2910, InvertedIndexQueryType::MATCH_REGEXP_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3059,10 +3048,10 @@ class InvertedIndexReaderTest : public testing::Test { EXPECT_NE(bkd_reader, nullptr); auto run_query = [&](InvertedIndexQueryType qt, T thr) { - auto qp = TypedInvertedIndexQueryParam::create_unique(); - qp->set_value(&thr); + using raw_t = typename PrimitiveTypeTraits::StorageFieldType; + Field qp = Field::create_field_from_olap_value(static_cast(thr)); auto bitmap = std::make_shared(); - auto status = bkd_reader->query(context, column_name, qp.get(), qt, bitmap); + auto status = bkd_reader->query(context, column_name, qp, qt, bitmap); EXPECT_TRUE(status.ok()) << column_name << ": " << status; return bitmap->cardinality(); }; @@ -3335,17 +3324,14 @@ class InvertedIndexReaderTest : public testing::Test { for (auto& test_case : test_cases) { // Test try_query path size_t count = 0; - auto qp_2956 = TypedInvertedIndexQueryParam::create_unique(); - qp_2956->set_value(&test_case.second); - auto status = - bkd_reader->try_query(context, "c1", qp_2956.get(), test_case.first, &count); + Field qp_2956 = Field::create_field(test_case.second); + auto status = bkd_reader->try_query(context, "c1", qp_2956, test_case.first, &count); EXPECT_TRUE(status.ok()) << "Try query type: " << static_cast(test_case.first); // Test actual query path std::shared_ptr bitmap = std::make_shared(); - auto qp_2962 = TypedInvertedIndexQueryParam::create_unique(); - qp_2962->set_value(&test_case.second); - status = bkd_reader->query(context, "c1", qp_2962.get(), test_case.first, bitmap); + Field qp_2962 = Field::create_field(test_case.second); + status = bkd_reader->query(context, "c1", qp_2962, test_case.first, bitmap); EXPECT_TRUE(status.ok()) << "Query type: " << static_cast(test_case.first); } @@ -3354,17 +3340,15 @@ class InvertedIndexReaderTest : public testing::Test { int32_t max_value = 100; // Greater than maximum in data std::shared_ptr bitmap = std::make_shared(); - auto qp_2971 = TypedInvertedIndexQueryParam::create_unique(); - qp_2971->set_value(&min_value); - auto status = bkd_reader->query(context, "c1", qp_2971.get(), + Field qp_2971 = Field::create_field(min_value); + auto status = bkd_reader->query(context, "c1", qp_2971, InvertedIndexQueryType::GREATER_THAN_QUERY, bitmap); EXPECT_TRUE(status.ok()); bitmap = std::make_shared(); - auto qp_2976 = TypedInvertedIndexQueryParam::create_unique(); - qp_2976->set_value(&max_value); - status = bkd_reader->query(context, "c1", qp_2976.get(), - InvertedIndexQueryType::LESS_THAN_QUERY, bitmap); + Field qp_2976 = Field::create_field(max_value); + status = bkd_reader->query(context, "c1", qp_2976, InvertedIndexQueryType::LESS_THAN_QUERY, + bitmap); EXPECT_TRUE(status.ok()); } @@ -3409,16 +3393,11 @@ class InvertedIndexReaderTest : public testing::Test { // Test the bypass path in read_from_inverted_index std::shared_ptr bitmap = std::make_shared(); int32_t query_value = 3; - auto make_int_qp = [&query_value]() { - auto qp = TypedInvertedIndexQueryParam::create_unique(); - qp->set_value(&query_value); - return qp; - }; // This should trigger the bypass logic due to low threshold InvertedIndexParam param; param.column_name = "c1"; - param.query_value = make_int_qp(); + param.query_value = Field::create_field(query_value); param.query_type = InvertedIndexQueryType::LESS_THAN_QUERY; param.num_rows = 5; param.roaring = bitmap; @@ -3432,7 +3411,7 @@ class InvertedIndexReaderTest : public testing::Test { bitmap = std::make_shared(); InvertedIndexParam param1; param1.column_name = "c1"; - param1.query_value = make_int_qp(); + param1.query_value = Field::create_field(query_value); param1.query_type = InvertedIndexQueryType::EQUAL_QUERY; param1.num_rows = 5; param1.roaring = bitmap; @@ -3442,11 +3421,11 @@ class InvertedIndexReaderTest : public testing::Test { // Test try_read_from_inverted_index with non-BKD compatible query size_t count = 0; - auto try_qp = make_int_qp(); + Field try_qp = Field::create_field(query_value); status = inverted_index_iterator->try_read_from_inverted_index( std::static_pointer_cast( iterator->get_reader(InvertedIndexReaderType::STRING_TYPE)), - "c1", try_qp.get(), InvertedIndexQueryType::MATCH_ANY_QUERY, &count); + "c1", try_qp, InvertedIndexQueryType::MATCH_ANY_QUERY, &count); EXPECT_TRUE(status.ok()); // Should succeed but not do anything for non-BKD queries } @@ -3595,10 +3574,8 @@ class InvertedIndexReaderTest : public testing::Test { for (auto& test_case : test_cases) { std::shared_ptr bitmap = std::make_shared(); - auto qp_3192 = TypedInvertedIndexQueryParam::create_unique(); - qp_3192->set_value(&test_case.second); - auto status = - bkd_reader->query(context, "c_int", qp_3192.get(), test_case.first, bitmap); + Field qp_3192 = Field::create_field(test_case.second); + auto status = bkd_reader->query(context, "c_int", qp_3192, test_case.first, bitmap); EXPECT_TRUE(status.ok()) << "Query type: " << static_cast(test_case.first); if (test_case.first == InvertedIndexQueryType::EQUAL_QUERY) { @@ -3609,10 +3586,9 @@ class InvertedIndexReaderTest : public testing::Test { for (auto& test_case : test_cases) { size_t count = 0; - auto qp_3204 = TypedInvertedIndexQueryParam::create_unique(); - qp_3204->set_value(&test_case.second); - auto status = bkd_reader->try_query(context, "c_int", qp_3204.get(), - test_case.first, &count); + Field qp_3204 = Field::create_field(test_case.second); + auto status = + bkd_reader->try_query(context, "c_int", qp_3204, test_case.first, &count); EXPECT_TRUE(status.ok()) << "Try query type: " << static_cast(test_case.first); } } @@ -3636,9 +3612,8 @@ class InvertedIndexReaderTest : public testing::Test { int64_t query_value = 1000000LL; std::shared_ptr bitmap = std::make_shared(); - auto qp_3229 = TypedInvertedIndexQueryParam::create_unique(); - qp_3229->set_value(&query_value); - auto status = bkd_reader->query(context, "c_bigint", qp_3229.get(), + Field qp_3229 = Field::create_field(query_value); + auto status = bkd_reader->query(context, "c_bigint", qp_3229, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); EXPECT_EQ(bitmap->cardinality(), 1); @@ -3671,10 +3646,10 @@ class InvertedIndexReaderTest : public testing::Test { for (auto& test_case : test_cases) { std::shared_ptr bitmap = std::make_shared(); - auto qp_3262 = TypedInvertedIndexQueryParam::create_unique(); - qp_3262->set_value(&test_case.second); - auto status = bkd_reader->query(context, "c_timestamptz", qp_3262.get(), - test_case.first, bitmap); + Field qp_3262 = + Field::create_field_from_olap_value(test_case.second); + auto status = bkd_reader->query(context, "c_timestamptz", qp_3262, test_case.first, + bitmap); EXPECT_TRUE(status.ok()) << "Query type: " << static_cast(test_case.first); if (test_case.first == InvertedIndexQueryType::EQUAL_QUERY) { @@ -3685,9 +3660,9 @@ class InvertedIndexReaderTest : public testing::Test { for (auto& test_case : test_cases) { size_t count = 0; - auto qp_3274 = TypedInvertedIndexQueryParam::create_unique(); - qp_3274->set_value(&test_case.second); - auto status = bkd_reader->try_query(context, "c_timestamptz", qp_3274.get(), + Field qp_3274 = + Field::create_field_from_olap_value(test_case.second); + auto status = bkd_reader->try_query(context, "c_timestamptz", qp_3274, test_case.first, &count); EXPECT_TRUE(status.ok()) << "Try query type: " << static_cast(test_case.first); } @@ -3785,9 +3760,8 @@ class InvertedIndexReaderTest : public testing::Test { std::shared_ptr bitmap = std::make_shared(); // TYPE_DATE storage is uint24_t — narrow from the test's uint32_t. typename PrimitiveTypeTraits::StorageFieldType date_storage(query_value); - auto qp_3366 = TypedInvertedIndexQueryParam::create_unique(); - qp_3366->set_value(&date_storage); - auto status = bkd_reader->query(context, "c_date", qp_3366.get(), + Field qp_3366 = Field::create_field_from_olap_value(date_storage); + auto status = bkd_reader->query(context, "c_date", qp_3366, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3810,14 +3784,11 @@ class InvertedIndexReaderTest : public testing::Test { auto bkd_reader = BkdIndexReader::create_shared(&idx_meta, reader); EXPECT_NE(bkd_reader, nullptr); - // TypedInvertedIndexQueryParam::storage_val is int64_t - // (conditional_t override on storage_val) — match the type to avoid - // signedness pointer-conversion errors. int64_t query_value = 20240101130000LL; std::shared_ptr bitmap = std::make_shared(); - auto qp_3391 = TypedInvertedIndexQueryParam::create_unique(); - qp_3391->set_value(&query_value); - auto status = bkd_reader->query(context, "c_datetime", qp_3391.get(), + Field qp_3391 = Field::create_field_from_olap_value( + static_cast(query_value)); + auto status = bkd_reader->query(context, "c_datetime", qp_3391, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3843,9 +3814,8 @@ class InvertedIndexReaderTest : public testing::Test { std::shared_ptr bitmap = std::make_shared(); // TYPE_BOOLEAN storage is uint8_t. uint8_t bool_storage = query_value ? 1 : 0; - auto qp_3415 = TypedInvertedIndexQueryParam::create_unique(); - qp_3415->set_value(&bool_storage); - auto status = bkd_reader->query(context, "c_bool", qp_3415.get(), + Field qp_3415 = Field::create_field(bool_storage); + auto status = bkd_reader->query(context, "c_bool", qp_3415, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3869,9 +3839,8 @@ class InvertedIndexReaderTest : public testing::Test { int8_t query_value = 1; std::shared_ptr bitmap = std::make_shared(); - auto qp_3439 = TypedInvertedIndexQueryParam::create_unique(); - qp_3439->set_value(&query_value); - auto status = bkd_reader->query(context, "c_tinyint", qp_3439.get(), + Field qp_3439 = Field::create_field(query_value); + auto status = bkd_reader->query(context, "c_tinyint", qp_3439, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3895,9 +3864,8 @@ class InvertedIndexReaderTest : public testing::Test { int16_t query_value = 1000; std::shared_ptr bitmap = std::make_shared(); - auto qp_3463 = TypedInvertedIndexQueryParam::create_unique(); - qp_3463->set_value(&query_value); - auto status = bkd_reader->query(context, "c_smallint", qp_3463.get(), + Field qp_3463 = Field::create_field(query_value); + auto status = bkd_reader->query(context, "c_smallint", qp_3463, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3921,9 +3889,8 @@ class InvertedIndexReaderTest : public testing::Test { __int128 query_value = 0; std::shared_ptr bitmap = std::make_shared(); - auto qp_3487 = TypedInvertedIndexQueryParam::create_unique(); - qp_3487->set_value(&query_value); - auto status = bkd_reader->query(context, "c_largeint", qp_3487.get(), + Field qp_3487 = Field::create_field(query_value); + auto status = bkd_reader->query(context, "c_largeint", qp_3487, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3947,9 +3914,8 @@ class InvertedIndexReaderTest : public testing::Test { uint32_t query_value = 20240202; std::shared_ptr bitmap = std::make_shared(); - auto qp_3511 = TypedInvertedIndexQueryParam::create_unique(); - qp_3511->set_value(&query_value); - auto status = bkd_reader->query(context, "c_datev2", qp_3511.get(), + Field qp_3511 = Field::create_field(query_value); + auto status = bkd_reader->query(context, "c_datev2", qp_3511, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3974,9 +3940,8 @@ class InvertedIndexReaderTest : public testing::Test { uint64_t query_value = 20240201130000ULL; std::shared_ptr bitmap = std::make_shared(); - auto qp_3536 = TypedInvertedIndexQueryParam::create_unique(); - qp_3536->set_value(&query_value); - auto status = bkd_reader->query(context, "c_datetimev2", qp_3536.get(), + Field qp_3536 = Field::create_field(query_value); + auto status = bkd_reader->query(context, "c_datetimev2", qp_3536, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -4001,9 +3966,8 @@ class InvertedIndexReaderTest : public testing::Test { uint64_t query_value = 20240201130000ULL; std::shared_ptr bitmap = std::make_shared(); - auto qp_3561 = TypedInvertedIndexQueryParam::create_unique(); - qp_3561->set_value(&query_value); - auto status = bkd_reader->query(context, "c_timestamptz", qp_3561.get(), + Field qp_3561 = Field::create_field_from_olap_value(query_value); + auto status = bkd_reader->query(context, "c_timestamptz", qp_3561, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -4089,10 +4053,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query_value = "test"; std::shared_ptr bitmap = std::make_shared(); - auto qp_unsupp = TypedInvertedIndexQueryParam::create_unique(); - qp_unsupp->set_value(query_value); + Field qp_unsupp = Field::create_field(query_value); auto query_status = - bkd_reader->query(context, "c_unsupported", qp_unsupp.get(), + bkd_reader->query(context, "c_unsupported", qp_unsupp, InvertedIndexQueryType::EQUAL_QUERY, bitmap); // This might fail due to unsupported type, which is what we want to test } diff --git a/be/test/storage/segment/inverted_index_writer_test.cpp b/be/test/storage/segment/inverted_index_writer_test.cpp index e3ad19a61d9af4..c91c44b0911ea3 100644 --- a/be/test/storage/segment/inverted_index_writer_test.cpp +++ b/be/test/storage/segment/inverted_index_writer_test.cpp @@ -33,6 +33,7 @@ #include "core/data_type/data_type_factory.hpp" #include "core/data_type/data_type_number.h" +#include "core/field.h" #include "io/fs/local_file_system.h" #include "runtime/runtime_state.h" #include "storage/field.h" @@ -40,7 +41,6 @@ #include "storage/index/index_file_writer.h" #include "storage/index/inverted/inverted_index_desc.h" #include "storage/index/inverted/inverted_index_fs_directory.h" -#include "storage/index/inverted/inverted_index_query_param.h" #include "storage/index/inverted/inverted_index_reader.h" #include "storage/iterator/olap_data_convertor.h" #include "storage/tablet/tablet_schema.h" @@ -177,9 +177,8 @@ class InvertedIndexWriterTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto qp = TypedInvertedIndexQueryParam::create_unique(); - qp->set_value(&values[i]); - auto status = bkd_reader->query(context, "c1", qp.get(), + Field qp = Field::create_field(values[i]); + auto status = bkd_reader->query(context, "c1", qp, doris::segment_v2::InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()) << status; @@ -205,9 +204,8 @@ class InvertedIndexWriterTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto test_qp = TypedInvertedIndexQueryParam::create_unique(); - test_qp->set_value(&test_value); - auto status = bkd_reader->query(context, "c1", test_qp.get(), + Field test_qp = Field::create_field(test_value); + auto status = bkd_reader->query(context, "c1", test_qp, doris::segment_v2::InvertedIndexQueryType::LESS_THAN_QUERY, less_than_bitmap); EXPECT_TRUE(status.ok()) << status; @@ -226,7 +224,7 @@ class InvertedIndexWriterTest : public testing::Test { // Test GREATER_THAN query std::shared_ptr greater_than_bitmap = std::make_shared(); - status = bkd_reader->query(context, "c1", test_qp.get(), + status = bkd_reader->query(context, "c1", test_qp, doris::segment_v2::InvertedIndexQueryType::GREATER_THAN_QUERY, greater_than_bitmap); EXPECT_TRUE(status.ok()) << status; @@ -744,9 +742,8 @@ class InvertedIndexWriterTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto qp = TypedInvertedIndexQueryParam::create_unique(); - qp->set_value(&str_ref); - auto query_status = inverted_reader->query(context, field_name, qp.get(), + Field qp = Field::create_field(std::string(str_ref.data, str_ref.size)); + auto query_status = inverted_reader->query(context, field_name, qp, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; // For regular strings, both should work the same @@ -953,14 +950,12 @@ TEST_F(InvertedIndexWriterTest, CompareUnicodeStringWriteResults) { context->runtime_state = &runtime_state; StringRef str_ref(values[i].data, values[i].size); - auto qp = TypedInvertedIndexQueryParam::create_unique(); - qp->set_value(&str_ref); + Field qp = Field::create_field(std::string(str_ref.data, str_ref.size)); auto query_status_enabled = inverted_reader_enabled->query( - context, field_name, qp.get(), InvertedIndexQueryType::EQUAL_QUERY, bitmap_enabled); + context, field_name, qp, InvertedIndexQueryType::EQUAL_QUERY, bitmap_enabled); auto query_status_disabled = inverted_reader_disabled->query( - context, field_name, qp.get(), InvertedIndexQueryType::EQUAL_QUERY, - bitmap_disabled); + context, field_name, qp, InvertedIndexQueryType::EQUAL_QUERY, bitmap_disabled); EXPECT_TRUE(query_status_enabled.ok()) << query_status_enabled; EXPECT_TRUE(query_status_disabled.ok()) << query_status_disabled; From 20b66f312ddfecfb0f13ab6a52ddf422e861d6d9 Mon Sep 17 00:00:00 2001 From: csun5285 Date: Wed, 6 May 2026 17:09:35 +0800 Subject: [PATCH 03/10] [refactor](inverted-index) drop empty query_param.h and simplify bkd_encode_field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit inverted_index_query_param.h has no remaining includers; the matching test file is a stub. Remove both. Also drop the FieldType template parameter from bkd_encode_field — the storage value's bytes are already correct for KeyCoder's compile-time CppType, so the explicit key_t conversion was unnecessary. bkd_encode_min/max still need the CppTypeTraits::CppType for the right type_limit sentinel. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../inverted/inverted_index_query_param.h | 20 --------------- .../index/inverted/inverted_index_reader.cpp | 15 ++++------- .../inverted_index_query_param_test.cpp | 25 ------------------- 3 files changed, 5 insertions(+), 55 deletions(-) delete mode 100644 be/src/storage/index/inverted/inverted_index_query_param.h delete mode 100644 be/test/storage/segment/inverted_index_query_param_test.cpp diff --git a/be/src/storage/index/inverted/inverted_index_query_param.h b/be/src/storage/index/inverted/inverted_index_query_param.h deleted file mode 100644 index cb92cf9661b6ad..00000000000000 --- a/be/src/storage/index/inverted/inverted_index_query_param.h +++ /dev/null @@ -1,20 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "core/field.h" diff --git a/be/src/storage/index/inverted/inverted_index_reader.cpp b/be/src/storage/index/inverted/inverted_index_reader.cpp index 95f7a071da1e82..82c88c1caea2a2 100644 --- a/be/src/storage/index/inverted/inverted_index_reader.cpp +++ b/be/src/storage/index/inverted/inverted_index_reader.cpp @@ -63,15 +63,10 @@ namespace { -// Encodes a Field's value as a BKD ascending key via KeyCoder. -// CppTypeTraits::CppType is the type KeyCoder expects (e.g. int64_t for -// DATETIME, where PrimitiveTypeTraits gives uint64_t — implicit conversion -// preserves the bit pattern under 2's complement). -template +template static void bkd_encode_field(const doris::Field& field, const doris::KeyCoder* coder, std::string* out) { - using key_t = typename doris::CppTypeTraits::CppType; - key_t kv = doris::PrimitiveTypeConvertor::to_storage_field_type(field.get()); + auto kv = doris::PrimitiveTypeConvertor::to_storage_field_type(field.get()); coder->full_encode_ascending(&kv, out); } @@ -113,9 +108,9 @@ static void bkd_encode_max(const doris::KeyCoder* coder, std::string* out) { static doris::Status encode_bkd_field_ascending(doris::FieldType ft, const doris::Field& field, const doris::KeyCoder* coder, std::string* out) { -#define CASE(FT, PT) \ - case doris::FieldType::FT: \ - bkd_encode_field(field, coder, out); \ +#define CASE(FT, PT) \ + case doris::FieldType::FT: \ + bkd_encode_field(field, coder, out); \ return doris::Status::OK(); switch (ft) { BKD_TYPE_CASES(CASE) diff --git a/be/test/storage/segment/inverted_index_query_param_test.cpp b/be/test/storage/segment/inverted_index_query_param_test.cpp deleted file mode 100644 index 5c06c1e5e5732b..00000000000000 --- a/be/test/storage/segment/inverted_index_query_param_test.cpp +++ /dev/null @@ -1,25 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// inverted_index_query_param.h has been removed; its functionality is now -// internal to BkdIndexReader. Tests are covered by inverted_index_reader_test.cpp. - -#include - -namespace doris::segment_v2 { -// No tests here; see inverted_index_reader_test.cpp. -} // namespace doris::segment_v2 From 41c771b13f71f11b65f26f7f22d4ceb81d00d10c Mon Sep 17 00:00:00 2001 From: csun5285 Date: Wed, 6 May 2026 17:51:00 +0800 Subject: [PATCH 04/10] [refactor](storage) unify Field key encoding between row_cursor and BKD Both RowCursor::_encode_field and BKD's encode_bkd_field_ascending did the same Field -> storage value -> KeyCoder dispatch with their own copy of the (FieldType, PrimitiveType) table. Extract the conversion helper and the dispatch X-macro into storage/field_key_encoder.h so both call sites share one source of truth. - field.h: expose StorageField::key_coder() for callers that already have a KeyCoder-shaped helper. - field_key_encoder.h: new header with full_encode_field_as_key / encode_field_as_key templates plus DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES X-macro. - row_cursor.cpp: 19 hand-written cases collapse into one macro expansion; encode_non_string_field wrapper removed. - inverted_index_reader.cpp: drops local bkd_encode_field and BKD_TYPE_CASES; the three encode_bkd_*_ascending functions reuse the shared macro. Co-Authored-By: Claude Opus 4.7 (1M context) --- be/src/storage/field.h | 2 + be/src/storage/field_key_encoder.h | 71 +++++++++++++++ .../index/inverted/inverted_index_reader.cpp | 42 ++------- be/src/storage/row_cursor.cpp | 88 +++---------------- 4 files changed, 92 insertions(+), 111 deletions(-) create mode 100644 be/src/storage/field_key_encoder.h diff --git a/be/src/storage/field.h b/be/src/storage/field.h index 164dd382fc718f..ab006b3f72a06a 100644 --- a/be/src/storage/field.h +++ b/be/src/storage/field.h @@ -84,6 +84,8 @@ class StorageField { void full_encode_ascending(const void* value, std::string* buf) const { _key_coder->full_encode_ascending(value, buf); } + + const KeyCoder* key_coder() const { return _key_coder; } void add_sub_field(std::unique_ptr sub_field) { _sub_fields.emplace_back(std::move(sub_field)); } diff --git a/be/src/storage/field_key_encoder.h b/be/src/storage/field_key_encoder.h new file mode 100644 index 00000000000000..9dc0175a73da75 --- /dev/null +++ b/be/src/storage/field_key_encoder.h @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "core/data_type/primitive_type.h" +#include "core/field.h" +#include "storage/key_coder.h" + +namespace doris { + +// Convert a Field value to its storage representation (via PrimitiveTypeConvertor) +// and full-encode it as a byte-comparable ascending key via KeyCoder. +template +inline void full_encode_field_as_key(const Field& f, const KeyCoder* coder, std::string* buf) { + auto v = PrimitiveTypeConvertor::to_storage_field_type(f.get()); + coder->full_encode_ascending(&v, buf); +} + +// Same as full_encode_field_as_key but truncates string keys to `index_size` bytes. +// For fixed-width types KeyCoder ignores `index_size`, so the output is identical +// to full_encode_field_as_key. +template +inline void encode_field_as_key(const Field& f, const KeyCoder* coder, size_t index_size, + std::string* buf) { + auto v = PrimitiveTypeConvertor::to_storage_field_type(f.get()); + coder->encode_ascending(&v, index_size, buf); +} + +// X-macro listing every (FieldType, PrimitiveType) pair that goes through KeyCoder +// as a non-string scalar key. Strings are handled separately because they need +// length / padding logic outside KeyCoder. Each entry: M(FT_suffix, PT_suffix). +#define DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(M) \ + M(OLAP_FIELD_TYPE_BOOL, TYPE_BOOLEAN) \ + M(OLAP_FIELD_TYPE_TINYINT, TYPE_TINYINT) \ + M(OLAP_FIELD_TYPE_SMALLINT, TYPE_SMALLINT) \ + M(OLAP_FIELD_TYPE_INT, TYPE_INT) \ + M(OLAP_FIELD_TYPE_BIGINT, TYPE_BIGINT) \ + M(OLAP_FIELD_TYPE_LARGEINT, TYPE_LARGEINT) \ + M(OLAP_FIELD_TYPE_FLOAT, TYPE_FLOAT) \ + M(OLAP_FIELD_TYPE_DOUBLE, TYPE_DOUBLE) \ + M(OLAP_FIELD_TYPE_DECIMAL, TYPE_DECIMALV2) \ + M(OLAP_FIELD_TYPE_DECIMAL32, TYPE_DECIMAL32) \ + M(OLAP_FIELD_TYPE_DECIMAL64, TYPE_DECIMAL64) \ + M(OLAP_FIELD_TYPE_DECIMAL128I, TYPE_DECIMAL128I) \ + M(OLAP_FIELD_TYPE_DECIMAL256, TYPE_DECIMAL256) \ + M(OLAP_FIELD_TYPE_DATE, TYPE_DATE) \ + M(OLAP_FIELD_TYPE_DATETIME, TYPE_DATETIME) \ + M(OLAP_FIELD_TYPE_DATEV2, TYPE_DATEV2) \ + M(OLAP_FIELD_TYPE_DATETIMEV2, TYPE_DATETIMEV2) \ + M(OLAP_FIELD_TYPE_TIMESTAMPTZ, TYPE_TIMESTAMPTZ) \ + M(OLAP_FIELD_TYPE_IPV4, TYPE_IPV4) \ + M(OLAP_FIELD_TYPE_IPV6, TYPE_IPV6) + +} // namespace doris diff --git a/be/src/storage/index/inverted/inverted_index_reader.cpp b/be/src/storage/index/inverted/inverted_index_reader.cpp index 82c88c1caea2a2..51286ed911e629 100644 --- a/be/src/storage/index/inverted/inverted_index_reader.cpp +++ b/be/src/storage/index/inverted/inverted_index_reader.cpp @@ -45,6 +45,7 @@ #include "runtime/runtime_profile.h" #include "runtime/runtime_state.h" #include "storage/field.h" +#include "storage/field_key_encoder.h" #include "storage/index/index_file_reader.h" #include "storage/index/index_reader_helper.h" #include "storage/index/inverted/analyzer/analyzer.h" @@ -63,13 +64,6 @@ namespace { -template -static void bkd_encode_field(const doris::Field& field, const doris::KeyCoder* coder, - std::string* out) { - auto kv = doris::PrimitiveTypeConvertor::to_storage_field_type(field.get()); - coder->full_encode_ascending(&kv, out); -} - template static void bkd_encode_min(const doris::KeyCoder* coder, std::string* out) { using key_t = typename doris::CppTypeTraits::CppType; @@ -84,36 +78,14 @@ static void bkd_encode_max(const doris::KeyCoder* coder, std::string* out) { coder->full_encode_ascending(&v, out); } -#define BKD_TYPE_CASES(MACRO) \ - MACRO(OLAP_FIELD_TYPE_BOOL, TYPE_BOOLEAN) \ - MACRO(OLAP_FIELD_TYPE_TINYINT, TYPE_TINYINT) \ - MACRO(OLAP_FIELD_TYPE_SMALLINT, TYPE_SMALLINT) \ - MACRO(OLAP_FIELD_TYPE_INT, TYPE_INT) \ - MACRO(OLAP_FIELD_TYPE_BIGINT, TYPE_BIGINT) \ - MACRO(OLAP_FIELD_TYPE_LARGEINT, TYPE_LARGEINT) \ - MACRO(OLAP_FIELD_TYPE_FLOAT, TYPE_FLOAT) \ - MACRO(OLAP_FIELD_TYPE_DOUBLE, TYPE_DOUBLE) \ - MACRO(OLAP_FIELD_TYPE_DECIMAL, TYPE_DECIMALV2) \ - MACRO(OLAP_FIELD_TYPE_DECIMAL32, TYPE_DECIMAL32) \ - MACRO(OLAP_FIELD_TYPE_DECIMAL64, TYPE_DECIMAL64) \ - MACRO(OLAP_FIELD_TYPE_DECIMAL128I, TYPE_DECIMAL128I) \ - MACRO(OLAP_FIELD_TYPE_DECIMAL256, TYPE_DECIMAL256) \ - MACRO(OLAP_FIELD_TYPE_DATE, TYPE_DATE) \ - MACRO(OLAP_FIELD_TYPE_DATETIME, TYPE_DATETIME) \ - MACRO(OLAP_FIELD_TYPE_DATEV2, TYPE_DATEV2) \ - MACRO(OLAP_FIELD_TYPE_DATETIMEV2, TYPE_DATETIMEV2) \ - MACRO(OLAP_FIELD_TYPE_TIMESTAMPTZ, TYPE_TIMESTAMPTZ) \ - MACRO(OLAP_FIELD_TYPE_IPV4, TYPE_IPV4) \ - MACRO(OLAP_FIELD_TYPE_IPV6, TYPE_IPV6) - static doris::Status encode_bkd_field_ascending(doris::FieldType ft, const doris::Field& field, const doris::KeyCoder* coder, std::string* out) { -#define CASE(FT, PT) \ - case doris::FieldType::FT: \ - bkd_encode_field(field, coder, out); \ +#define CASE(FT, PT) \ + case doris::FieldType::FT: \ + doris::full_encode_field_as_key(field, coder, out); \ return doris::Status::OK(); switch (ft) { - BKD_TYPE_CASES(CASE) + DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE) default: break; } @@ -128,7 +100,7 @@ static doris::Status encode_bkd_min_ascending(doris::FieldType ft, const doris:: bkd_encode_min(coder, out); \ return doris::Status::OK(); switch (ft) { - BKD_TYPE_CASES(CASE) + DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE) default: break; } @@ -143,7 +115,7 @@ static doris::Status encode_bkd_max_ascending(doris::FieldType ft, const doris:: bkd_encode_max(coder, out); \ return doris::Status::OK(); switch (ft) { - BKD_TYPE_CASES(CASE) + DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE) default: break; } diff --git a/be/src/storage/row_cursor.cpp b/be/src/storage/row_cursor.cpp index 466ee083a1d902..53d7322be338f0 100644 --- a/be/src/storage/row_cursor.cpp +++ b/be/src/storage/row_cursor.cpp @@ -28,6 +28,7 @@ #include "core/data_type/primitive_type.h" #include "core/field.h" #include "storage/field.h" +#include "storage/field_key_encoder.h" #include "storage/olap_common.h" #include "storage/olap_define.h" #include "storage/tablet/tablet_schema.h" @@ -151,20 +152,6 @@ std::string RowCursor::to_string() const { return result; } -// Convert a Field value to its storage representation via PrimitiveTypeConvertor and encode. -// For most types this is an identity conversion; for DATE, DATETIME, DECIMALV2 it does -// actual conversion to the olap storage format. -template -static void encode_non_string_field(const StorageField* storage_field, const Field& f, - bool full_encode, std::string* buf) { - auto storage_val = PrimitiveTypeConvertor::to_storage_field_type(f.get()); - if (full_encode) { - storage_field->full_encode_ascending(&storage_val, buf); - } else { - storage_field->encode_ascending(&storage_val, buf); - } -} - void RowCursor::_encode_field(const StorageField* storage_field, const Field& f, bool full_encode, std::string* buf) const { FieldType ft = storage_field->type(); @@ -197,69 +184,18 @@ void RowCursor::_encode_field(const StorageField* storage_field, const Field& f, return; } - // Non-string types: convert Field value to storage format via PrimitiveTypeConvertor, - // then encode. For most types this is an identity conversion. + const KeyCoder* coder = storage_field->key_coder(); switch (ft) { - case FieldType::OLAP_FIELD_TYPE_BOOL: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_TINYINT: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_SMALLINT: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_INT: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_BIGINT: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_LARGEINT: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_FLOAT: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DOUBLE: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DATE: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DATETIME: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DATEV2: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DECIMAL: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DECIMAL32: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DECIMAL64: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DECIMAL256: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_IPV4: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_IPV6: - encode_non_string_field(storage_field, f, full_encode, buf); - break; +#define CASE(FT, PT) \ + case FieldType::FT: \ + if (full_encode) { \ + full_encode_field_as_key(f, coder, buf); \ + } else { \ + encode_field_as_key(f, coder, storage_field->index_size(), buf); \ + } \ + break; + DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE) +#undef CASE default: LOG(FATAL) << "unsupported field type for encoding: " << int(ft); break; From 12c5f17078dd641dd52cad58d2ef6b205b976b0e Mon Sep 17 00:00:00 2001 From: csun5285 Date: Wed, 6 May 2026 18:15:07 +0800 Subject: [PATCH 05/10] [refactor](storage) move encode_field_as_key helpers into key_coder.h The Field-to-key encoding helpers and the dispatch X-macro fit naturally next to KeyCoder rather than in a stand-alone header, since they are thin wrappers around KeyCoder calls. Inline them into storage/key_coder.h and remove storage/field_key_encoder.h. Co-Authored-By: Claude Opus 4.7 (1M context) --- be/src/storage/field_key_encoder.h | 71 ------------------- .../index/inverted/inverted_index_reader.cpp | 1 - be/src/storage/key_coder.h | 45 ++++++++++++ be/src/storage/row_cursor.cpp | 1 - 4 files changed, 45 insertions(+), 73 deletions(-) delete mode 100644 be/src/storage/field_key_encoder.h diff --git a/be/src/storage/field_key_encoder.h b/be/src/storage/field_key_encoder.h deleted file mode 100644 index 9dc0175a73da75..00000000000000 --- a/be/src/storage/field_key_encoder.h +++ /dev/null @@ -1,71 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "core/data_type/primitive_type.h" -#include "core/field.h" -#include "storage/key_coder.h" - -namespace doris { - -// Convert a Field value to its storage representation (via PrimitiveTypeConvertor) -// and full-encode it as a byte-comparable ascending key via KeyCoder. -template -inline void full_encode_field_as_key(const Field& f, const KeyCoder* coder, std::string* buf) { - auto v = PrimitiveTypeConvertor::to_storage_field_type(f.get()); - coder->full_encode_ascending(&v, buf); -} - -// Same as full_encode_field_as_key but truncates string keys to `index_size` bytes. -// For fixed-width types KeyCoder ignores `index_size`, so the output is identical -// to full_encode_field_as_key. -template -inline void encode_field_as_key(const Field& f, const KeyCoder* coder, size_t index_size, - std::string* buf) { - auto v = PrimitiveTypeConvertor::to_storage_field_type(f.get()); - coder->encode_ascending(&v, index_size, buf); -} - -// X-macro listing every (FieldType, PrimitiveType) pair that goes through KeyCoder -// as a non-string scalar key. Strings are handled separately because they need -// length / padding logic outside KeyCoder. Each entry: M(FT_suffix, PT_suffix). -#define DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(M) \ - M(OLAP_FIELD_TYPE_BOOL, TYPE_BOOLEAN) \ - M(OLAP_FIELD_TYPE_TINYINT, TYPE_TINYINT) \ - M(OLAP_FIELD_TYPE_SMALLINT, TYPE_SMALLINT) \ - M(OLAP_FIELD_TYPE_INT, TYPE_INT) \ - M(OLAP_FIELD_TYPE_BIGINT, TYPE_BIGINT) \ - M(OLAP_FIELD_TYPE_LARGEINT, TYPE_LARGEINT) \ - M(OLAP_FIELD_TYPE_FLOAT, TYPE_FLOAT) \ - M(OLAP_FIELD_TYPE_DOUBLE, TYPE_DOUBLE) \ - M(OLAP_FIELD_TYPE_DECIMAL, TYPE_DECIMALV2) \ - M(OLAP_FIELD_TYPE_DECIMAL32, TYPE_DECIMAL32) \ - M(OLAP_FIELD_TYPE_DECIMAL64, TYPE_DECIMAL64) \ - M(OLAP_FIELD_TYPE_DECIMAL128I, TYPE_DECIMAL128I) \ - M(OLAP_FIELD_TYPE_DECIMAL256, TYPE_DECIMAL256) \ - M(OLAP_FIELD_TYPE_DATE, TYPE_DATE) \ - M(OLAP_FIELD_TYPE_DATETIME, TYPE_DATETIME) \ - M(OLAP_FIELD_TYPE_DATEV2, TYPE_DATEV2) \ - M(OLAP_FIELD_TYPE_DATETIMEV2, TYPE_DATETIMEV2) \ - M(OLAP_FIELD_TYPE_TIMESTAMPTZ, TYPE_TIMESTAMPTZ) \ - M(OLAP_FIELD_TYPE_IPV4, TYPE_IPV4) \ - M(OLAP_FIELD_TYPE_IPV6, TYPE_IPV6) - -} // namespace doris diff --git a/be/src/storage/index/inverted/inverted_index_reader.cpp b/be/src/storage/index/inverted/inverted_index_reader.cpp index 51286ed911e629..5d4b1527719d7e 100644 --- a/be/src/storage/index/inverted/inverted_index_reader.cpp +++ b/be/src/storage/index/inverted/inverted_index_reader.cpp @@ -45,7 +45,6 @@ #include "runtime/runtime_profile.h" #include "runtime/runtime_state.h" #include "storage/field.h" -#include "storage/field_key_encoder.h" #include "storage/index/index_file_reader.h" #include "storage/index/index_reader_helper.h" #include "storage/index/inverted/analyzer/analyzer.h" diff --git a/be/src/storage/key_coder.h b/be/src/storage/key_coder.h index 2952949c02d5d3..80724603c8ae59 100644 --- a/be/src/storage/key_coder.h +++ b/be/src/storage/key_coder.h @@ -29,8 +29,10 @@ #include "absl/strings/substitute.h" #include "common/status.h" +#include "core/data_type/primitive_type.h" #include "core/decimal12.h" #include "core/extended_types.h" +#include "core/field.h" #include "core/types.h" #include "exec/common/endian.h" #include "storage/olap_common.h" @@ -446,4 +448,47 @@ template <> class KeyCoderTraits : public KeyCoderTraitsForFloat {}; +// Convert a Field value to its storage representation (via PrimitiveTypeConvertor) +// and full-encode it as a byte-comparable ascending key via KeyCoder. +template +inline void full_encode_field_as_key(const Field& f, const KeyCoder* coder, std::string* buf) { + auto v = PrimitiveTypeConvertor::to_storage_field_type(f.get()); + coder->full_encode_ascending(&v, buf); +} + +// Same as full_encode_field_as_key but truncates string keys to `index_size` bytes. +// For fixed-width types KeyCoder ignores `index_size`, so the output is identical +// to full_encode_field_as_key. +template +inline void encode_field_as_key(const Field& f, const KeyCoder* coder, size_t index_size, + std::string* buf) { + auto v = PrimitiveTypeConvertor::to_storage_field_type(f.get()); + coder->encode_ascending(&v, index_size, buf); +} + +// X-macro listing every (FieldType, PrimitiveType) pair that goes through KeyCoder +// as a non-string scalar key. Strings are handled separately because they need +// length / padding logic outside KeyCoder. Each entry: M(FT_suffix, PT_suffix). +#define DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(M) \ + M(OLAP_FIELD_TYPE_BOOL, TYPE_BOOLEAN) \ + M(OLAP_FIELD_TYPE_TINYINT, TYPE_TINYINT) \ + M(OLAP_FIELD_TYPE_SMALLINT, TYPE_SMALLINT) \ + M(OLAP_FIELD_TYPE_INT, TYPE_INT) \ + M(OLAP_FIELD_TYPE_BIGINT, TYPE_BIGINT) \ + M(OLAP_FIELD_TYPE_LARGEINT, TYPE_LARGEINT) \ + M(OLAP_FIELD_TYPE_FLOAT, TYPE_FLOAT) \ + M(OLAP_FIELD_TYPE_DOUBLE, TYPE_DOUBLE) \ + M(OLAP_FIELD_TYPE_DECIMAL, TYPE_DECIMALV2) \ + M(OLAP_FIELD_TYPE_DECIMAL32, TYPE_DECIMAL32) \ + M(OLAP_FIELD_TYPE_DECIMAL64, TYPE_DECIMAL64) \ + M(OLAP_FIELD_TYPE_DECIMAL128I, TYPE_DECIMAL128I) \ + M(OLAP_FIELD_TYPE_DECIMAL256, TYPE_DECIMAL256) \ + M(OLAP_FIELD_TYPE_DATE, TYPE_DATE) \ + M(OLAP_FIELD_TYPE_DATETIME, TYPE_DATETIME) \ + M(OLAP_FIELD_TYPE_DATEV2, TYPE_DATEV2) \ + M(OLAP_FIELD_TYPE_DATETIMEV2, TYPE_DATETIMEV2) \ + M(OLAP_FIELD_TYPE_TIMESTAMPTZ, TYPE_TIMESTAMPTZ) \ + M(OLAP_FIELD_TYPE_IPV4, TYPE_IPV4) \ + M(OLAP_FIELD_TYPE_IPV6, TYPE_IPV6) + } // namespace doris diff --git a/be/src/storage/row_cursor.cpp b/be/src/storage/row_cursor.cpp index 53d7322be338f0..44838dcc2df3fa 100644 --- a/be/src/storage/row_cursor.cpp +++ b/be/src/storage/row_cursor.cpp @@ -28,7 +28,6 @@ #include "core/data_type/primitive_type.h" #include "core/field.h" #include "storage/field.h" -#include "storage/field_key_encoder.h" #include "storage/olap_common.h" #include "storage/olap_define.h" #include "storage/tablet/tablet_schema.h" From a5489892933713a23bfff9a68cffb7a688271be9 Mon Sep 17 00:00:00 2001 From: csun5285 Date: Fri, 8 May 2026 23:59:02 +0800 Subject: [PATCH 06/10] [fix](inverted-index) skip BKD encode when query Field type does not match indexed type `Field::get()` DCHECKs that the Field's primitive type tag equals `PT`, but predicates like `arr_col = []` reach `encode_bkd_field_ascending` via `FunctionComparison` with the entire const ARRAY literal as the query Field, so `actual = TYPE_ARRAY` while the BKD index records the inner scalar (e.g. IPV4). Under ASAN the assert aborts the BE with "requested IPV4, actual ARRAY" -- before the void*->Field refactor the old factory rejected non-scalar types via NotSupported and the engine fell back, this defense was lost when the typed dispatch moved into BKD. Validate the Field type before dispatching to `full_encode_field_as_key` and return INVERTED_INDEX_EVALUATE_SKIPPED on mismatch so `SegmentIterator::_apply_index_expr` downgrades to scalar evaluation instead of crashing on the assert. Scalar predicates (`int_col = 1`, `array_contains(int_arr, 2)`) keep matching as before. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../index/inverted/inverted_index_reader.cpp | 24 +++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/be/src/storage/index/inverted/inverted_index_reader.cpp b/be/src/storage/index/inverted/inverted_index_reader.cpp index 5d4b1527719d7e..fbd1af40baa2ec 100644 --- a/be/src/storage/index/inverted/inverted_index_reader.cpp +++ b/be/src/storage/index/inverted/inverted_index_reader.cpp @@ -79,10 +79,26 @@ static void bkd_encode_max(const doris::KeyCoder* coder, std::string* out) { static doris::Status encode_bkd_field_ascending(doris::FieldType ft, const doris::Field& field, const doris::KeyCoder* coder, std::string* out) { -#define CASE(FT, PT) \ - case doris::FieldType::FT: \ - doris::full_encode_field_as_key(field, coder, out); \ - return doris::Status::OK(); + // `actual` is the primitive type of the query Field from the caller; `PrimitiveType::PT` is the + // scalar type the BKD index stores (e.g. INT for an INT column or ARRAY index). + // Normally they match: `int_col = 1` -> both INT; `array_contains(int_arr, 2)` -> both INT. + // Mismatch happens when the query Field carries a non-scalar while BKD records the inner scalar: + // `arr = []` reaches here via `FunctionComparison` with the entire const ARRAY literal + // as the query Field, so `actual = TYPE_ARRAY` while PT is the inner scalar -- the predicate + // cannot be answered by BKD. Return INVERTED_INDEX_EVALUATE_SKIPPED so `_apply_index_expr` + // downgrades to scalar evaluation instead of crashing on `Field::get()` DCHECK below. +#define CASE(FT, PT) \ + case doris::FieldType::FT: { \ + const auto actual = field.get_type(); \ + if (actual != doris::PrimitiveType::PT && actual != doris::PrimitiveType::TYPE_NULL && \ + !(doris::is_string_type(actual) && doris::is_string_type(doris::PrimitiveType::PT))) { \ + return doris::Status::Error( \ + "BKD query value type {} does not match index type {}", \ + static_cast(actual), static_cast(ft)); \ + } \ + doris::full_encode_field_as_key(field, coder, out); \ + return doris::Status::OK(); \ + } switch (ft) { DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE) default: From 9c46cf4d0a14883dd578a59e9bec7832110b1fb0 Mon Sep 17 00:00:00 2001 From: csun5285 Date: Sun, 10 May 2026 13:28:24 +0800 Subject: [PATCH 07/10] [refactor](inverted-index) source BKD sentinels from compute-layer type_limit bkd_encode_min/max are now templated by PrimitiveType instead of FieldType. The +/- infinity sentinel is taken from type_limit in the compute layer (e.g. DecimalV2Value::get_min_decimal, VecDateTimeValue::datetime_min_value) and projected onto the storage POD via PrimitiveTypeConvertor::to_storage_field_type. This single-sources every limit constant: DecimalV2 bounds live only on DecimalV2Value, DATE bounds only on VecDateTimeValue. The two storage-layer type_limit<> specialisations added for decimal12_t and uint24_t in the previous PR are no longer required and are removed along with their includes and the half-bounded-BKD comment block. core/type_limit.h is now exclusively a compute-layer header. Tests: the two sanity-probe tests that asserted on the deleted specialisations are removed; verify_bkd_range_queries (one TEST_F per BKD-supported PT) still exercises the same +/- infinity codepath end-to-end. Co-Authored-By: Claude Opus 4.7 (1M context) --- be/src/core/type_limit.h | 20 --------- .../index/inverted/inverted_index_reader.cpp | 33 +++++++++------ .../segment/inverted_index_reader_test.cpp | 42 +++---------------- 3 files changed, 26 insertions(+), 69 deletions(-) diff --git a/be/src/core/type_limit.h b/be/src/core/type_limit.h index 218f24d3961b47..824433a03191dc 100644 --- a/be/src/core/type_limit.h +++ b/be/src/core/type_limit.h @@ -17,10 +17,8 @@ #pragma once -#include "core/decimal12.h" #include "core/extended_types.h" #include "core/string_ref.h" -#include "core/uint24.h" #include "core/value/decimalv2_value.h" #include "core/value/timestamptz_value.h" @@ -56,24 +54,6 @@ struct type_limit { static DecimalV2Value max() { return DecimalV2Value::get_max_decimal(); } }; -// std::numeric_limits is not specialised for these custom storage types, so -// the generic type_limit would return T() = zero for both min and max, -// silently breaking BKD half-bounded range queries. - -// DECIMALV2 storage. Largest representable DecimalV2 value (18 digits . 9 digits). -template <> -struct type_limit { - static decimal12_t min() { return decimal12_t {-999999999999999999LL, -999999999}; } - static decimal12_t max() { return decimal12_t {+999999999999999999LL, +999999999}; } -}; - -// DATE storage. Packed as `year<<9 | month<<5 | day`: 33=0001-01-01, 5119903=9999-12-31. -template <> -struct type_limit { - static uint24_t min() { return uint24_t(33); } - static uint24_t max() { return uint24_t(5119903); } -}; - template <> struct type_limit { static Decimal32 max() { return 999999999; } diff --git a/be/src/storage/index/inverted/inverted_index_reader.cpp b/be/src/storage/index/inverted/inverted_index_reader.cpp index fbd1af40baa2ec..80de3b0aac8744 100644 --- a/be/src/storage/index/inverted/inverted_index_reader.cpp +++ b/be/src/storage/index/inverted/inverted_index_reader.cpp @@ -63,17 +63,26 @@ namespace { -template +// Sentinel values are sourced from the compute-layer `type_limit` and +// then projected onto the storage-layer POD via `PrimitiveTypeConvertor`. +// Routing through the compute layer keeps the +/- infinity constants +// single-sourced (e.g. DecimalV2 max lives only in DecimalV2Value::get_max_decimal, +// DATE bounds only in VecDateTimeValue::datetime_min/max_value), so types like +// decimal12_t and uint24_t — which have no std::numeric_limits specialisation — +// no longer need their own type_limit<> entries. +template static void bkd_encode_min(const doris::KeyCoder* coder, std::string* out) { - using key_t = typename doris::CppTypeTraits::CppType; - key_t v = doris::type_limit::min(); + using compute_t = typename doris::PrimitiveTypeTraits::CppType; + auto compute_v = doris::type_limit::min(); + auto v = doris::PrimitiveTypeConvertor::to_storage_field_type(compute_v); coder->full_encode_ascending(&v, out); } -template +template static void bkd_encode_max(const doris::KeyCoder* coder, std::string* out) { - using key_t = typename doris::CppTypeTraits::CppType; - key_t v = doris::type_limit::max(); + using compute_t = typename doris::PrimitiveTypeTraits::CppType; + auto compute_v = doris::type_limit::max(); + auto v = doris::PrimitiveTypeConvertor::to_storage_field_type(compute_v); coder->full_encode_ascending(&v, out); } @@ -110,9 +119,9 @@ static doris::Status encode_bkd_field_ascending(doris::FieldType ft, const doris static doris::Status encode_bkd_min_ascending(doris::FieldType ft, const doris::KeyCoder* coder, std::string* out) { -#define CASE(FT, PT) \ - case doris::FieldType::FT: \ - bkd_encode_min(coder, out); \ +#define CASE(FT, PT) \ + case doris::FieldType::FT: \ + bkd_encode_min(coder, out); \ return doris::Status::OK(); switch (ft) { DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE) @@ -125,9 +134,9 @@ static doris::Status encode_bkd_min_ascending(doris::FieldType ft, const doris:: static doris::Status encode_bkd_max_ascending(doris::FieldType ft, const doris::KeyCoder* coder, std::string* out) { -#define CASE(FT, PT) \ - case doris::FieldType::FT: \ - bkd_encode_max(coder, out); \ +#define CASE(FT, PT) \ + case doris::FieldType::FT: \ + bkd_encode_max(coder, out); \ return doris::Status::OK(); switch (ft) { DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE) diff --git a/be/test/storage/segment/inverted_index_reader_test.cpp b/be/test/storage/segment/inverted_index_reader_test.cpp index bac61e551201cd..2305833a402778 100644 --- a/be/test/storage/segment/inverted_index_reader_test.cpp +++ b/be/test/storage/segment/inverted_index_reader_test.cpp @@ -2989,29 +2989,6 @@ class InvertedIndexReaderTest : public testing::Test { } } - // Sanity probe: confirm type_limit is specialised and not - // falling through to the zero-init default of an unspecialised - // numeric_limits. TypedInvertedIndexQueryParam::encode_min/ - // max_ascending depend on this specialisation for valid +/-infinity - // sentinels — without it both bounds collapse to encoded({0,0}) and BKD - // half-bounded range queries on DecimalV2 columns silently miss negative - // (for LESS_THAN) or positive (for GREATER_THAN) values. - void test_type_limit_decimal12_specialisation() { - auto lo = type_limit::min(); - auto hi = type_limit::max(); - EXPECT_EQ(lo.integer, -999999999999999999LL); - EXPECT_EQ(lo.fraction, -999999999); - EXPECT_EQ(hi.integer, +999999999999999999LL); - EXPECT_EQ(hi.fraction, +999999999); - } - - // Same regression as decimal12_t but for uint24_t (TYPE_DATE storage). - // Values match OLAP DATE packing: 0001-01-01 / 9999-12-31. - void test_type_limit_uint24_specialisation() { - EXPECT_EQ(static_cast(type_limit::min()), 33u); - EXPECT_EQ(static_cast(type_limit::max()), 5119903u); - } - // Generic BKD range-query verifier. Writes `values` into the BKD index // for `column_name`, then runs EQUAL / LESS_THAN / LESS_EQUAL / // GREATER_THAN / GREATER_EQUAL queries against `threshold`. Expected @@ -3020,7 +2997,8 @@ class InvertedIndexReaderTest : public testing::Test { // // Locks in: // * the typed-param interface (TypedInvertedIndexQueryParam) - // * the +/-infinity sentinels from type_limit + // * the +/-infinity sentinels routed through type_limit + + // PrimitiveTypeConvertor // * BKD's writer/reader/visitor agreement on KeyCoder-encoded bytes template void verify_bkd_range_queries(int col_id, std::string_view rowset_id, @@ -4083,25 +4061,15 @@ TEST_F(InvertedIndexReaderTest, BkdIndexRead) { test_bkd_index_read(); } -// Regression: type_limit must be specialised, otherwise both -// min() and max() collapse to decimal12_t{0, 0} (zero-init default of an -// unspecialised numeric_limits) and TypedInvertedIndexQueryParam -// produces invalid +/-infinity sentinels. -TEST_F(InvertedIndexReaderTest, TypeLimitDecimal12Specialisation) { - test_type_limit_decimal12_specialisation(); -} - // BKD half-bounded range query regression suite, one TEST_F per BKD-supported // PrimitiveType. They all share `verify_bkd_range_queries`, which: // - writes 6 sorted values into a fresh BKD index // - asserts EQUAL / LESS_THAN / LESS_EQUAL / GREATER_THAN / GREATER_EQUAL // cardinalities derived from the values via std::count_if. // -// Locks in the typed-param interface, the +/-infinity sentinels from -// type_limit, and BKD writer/reader/visitor agreement. -TEST_F(InvertedIndexReaderTest, TypeLimitUint24Specialisation) { - test_type_limit_uint24_specialisation(); -} +// Locks in the typed-param interface, the +/-infinity sentinels routed +// through type_limit + PrimitiveTypeConvertor, and BKD +// writer/reader/visitor agreement. TEST_F(InvertedIndexReaderTest, BkdRangeIntRangeQuery) { test_bkd_range_int(); } From a355a8df558d90cf2f06d570195951ed3ae320c0 Mon Sep 17 00:00:00 2001 From: csun5285 Date: Sun, 10 May 2026 13:53:39 +0800 Subject: [PATCH 08/10] [fix](field) throw on type mismatch in Field::get() instead of DCHECK The DCHECK in both Field::get() overloads was debug-only, so a release build silently returned a reinterpret_cast of unrelated storage bytes when T disagreed with the stored type. Replace it with a runtime check that throws Exception(FatalError(...)), matching the rest of field.cpp's error style and surfacing the bug in production. The existing INVERTED_INDEX_EVALUATE_SKIPPED gate in encode_bkd_field_ascending still fires first for the legitimate ARRAY-query-vs-scalar-BKD-index case so the predicate falls back to scalar evaluation; the throw inside Field::get is defense-in-depth for any future caller that forgets to gate. Co-Authored-By: Claude Opus 4.7 (1M context) --- be/src/core/field.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/be/src/core/field.cpp b/be/src/core/field.cpp index 065be343962dd9..f63aaf8227a7c6 100644 --- a/be/src/core/field.cpp +++ b/be/src/core/field.cpp @@ -649,16 +649,20 @@ std::string Field::get_type_name() const { template typename PrimitiveTypeTraits::CppType& Field::get() { - DCHECK(T == type || (is_string_type(type) && is_string_type(T)) || type == TYPE_NULL) - << "Type mismatch: requested " << type_to_string(T) << ", actual " << get_type_name(); + if (T != type && !(is_string_type(type) && is_string_type(T)) && type != TYPE_NULL) { + throw Exception(Status::FatalError("Field::get type mismatch: requested {}, actual {}", + type_to_string(T), get_type_name())); + } auto* MAY_ALIAS ptr = reinterpret_cast::CppType*>(&storage); return *ptr; } template const typename PrimitiveTypeTraits::CppType& Field::get() const { - DCHECK(T == type || (is_string_type(type) && is_string_type(T)) || type == TYPE_NULL) - << "Type mismatch: requested " << type_to_string(T) << ", actual " << get_type_name(); + if (T != type && !(is_string_type(type) && is_string_type(T)) && type != TYPE_NULL) { + throw Exception(Status::FatalError("Field::get type mismatch: requested {}, actual {}", + type_to_string(T), get_type_name())); + } const auto* MAY_ALIAS ptr = reinterpret_cast::CppType*>(&storage); return *ptr; From 0cf742b86a583a845c651c4e47778de2a2299b7a Mon Sep 17 00:00:00 2001 From: csun5285 Date: Mon, 11 May 2026 11:34:23 +0800 Subject: [PATCH 09/10] [refactor](storage) drop unused encode_field_as_key helper and assert non-string keys `encode_field_as_key` was a near-duplicate of `full_encode_field_as_key`. After `DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES` excluded strings from its dispatch, the `index_size` argument became dead: every non-string `KeyCoderTraits::encode_ascending` just delegates to `full_encode_ascending` and ignores `index_size`. Drop the helper. Also tighten `full_encode_field_as_key` with a compile-time assert via a new `is_key_encodable_non_string_type(PrimitiveType)` constexpr derived from the existing macro, so future callers can't accidentally feed it a string or nested/aggregate type. `RowCursor::_encode_field` simplifies accordingly: its non-string branch no longer needs to distinguish `full_encode`, since both paths produce byte-identical output for fixed-width keys. The flag still matters for the string branch and is preserved there. Co-Authored-By: Claude Opus 4.7 (1M context) --- be/src/storage/key_coder.h | 49 ++++++++++++++++++++++------------- be/src/storage/row_cursor.cpp | 13 +++++----- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/be/src/storage/key_coder.h b/be/src/storage/key_coder.h index 80724603c8ae59..0c4bcf08d171e5 100644 --- a/be/src/storage/key_coder.h +++ b/be/src/storage/key_coder.h @@ -448,24 +448,6 @@ template <> class KeyCoderTraits : public KeyCoderTraitsForFloat {}; -// Convert a Field value to its storage representation (via PrimitiveTypeConvertor) -// and full-encode it as a byte-comparable ascending key via KeyCoder. -template -inline void full_encode_field_as_key(const Field& f, const KeyCoder* coder, std::string* buf) { - auto v = PrimitiveTypeConvertor::to_storage_field_type(f.get()); - coder->full_encode_ascending(&v, buf); -} - -// Same as full_encode_field_as_key but truncates string keys to `index_size` bytes. -// For fixed-width types KeyCoder ignores `index_size`, so the output is identical -// to full_encode_field_as_key. -template -inline void encode_field_as_key(const Field& f, const KeyCoder* coder, size_t index_size, - std::string* buf) { - auto v = PrimitiveTypeConvertor::to_storage_field_type(f.get()); - coder->encode_ascending(&v, index_size, buf); -} - // X-macro listing every (FieldType, PrimitiveType) pair that goes through KeyCoder // as a non-string scalar key. Strings are handled separately because they need // length / padding logic outside KeyCoder. Each entry: M(FT_suffix, PT_suffix). @@ -491,4 +473,35 @@ inline void encode_field_as_key(const Field& f, const KeyCoder* coder, size_t in M(OLAP_FIELD_TYPE_IPV4, TYPE_IPV4) \ M(OLAP_FIELD_TYPE_IPV6, TYPE_IPV6) +// True for exactly the PrimitiveTypes listed in +// DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES. Strings (CHAR/VARCHAR/STRING/ +// VARBINARY) have their own short-key code path in row_cursor.cpp that calls +// storage_field->full_encode_ascending directly, and nested/aggregate types +// (ARRAY/MAP/STRUCT/VARIANT/HLL/BITMAP/JSONB/QUANTILE_STATE/AGG_STATE) are not +// key-encodable at all -- both groups must never reach the helpers below. +constexpr bool is_key_encodable_non_string_type(PrimitiveType pt) { + switch (pt) { +#define DORIS_KEY_ENCODABLE_CASE(FT, PT) \ + case PrimitiveType::PT: \ + return true; + DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(DORIS_KEY_ENCODABLE_CASE) +#undef DORIS_KEY_ENCODABLE_CASE + default: + return false; + } +} + +// Convert a Field value to its storage representation (via PrimitiveTypeConvertor) +// and full-encode it as a byte-comparable ascending key via KeyCoder. +template +inline void full_encode_field_as_key(const Field& f, const KeyCoder* coder, std::string* buf) { + static_assert(is_key_encodable_non_string_type(PT), + "full_encode_field_as_key is for non-string scalar keys only; " + "strings have their own path in RowCursor that calls " + "storage_field->full_encode_ascending directly, and nested / " + "aggregate types are not key-encodable"); + auto v = PrimitiveTypeConvertor::to_storage_field_type(f.get()); + coder->full_encode_ascending(&v, buf); +} + } // namespace doris diff --git a/be/src/storage/row_cursor.cpp b/be/src/storage/row_cursor.cpp index 44838dcc2df3fa..ef649a6a092979 100644 --- a/be/src/storage/row_cursor.cpp +++ b/be/src/storage/row_cursor.cpp @@ -183,15 +183,14 @@ void RowCursor::_encode_field(const StorageField* storage_field, const Field& f, return; } + // Non-string scalar keys are fixed-width; their KeyCoder::encode_ascending + // ignores `index_size` and delegates to full_encode_ascending, so the + // `full_encode` flag here is a no-op and we always call the full helper. const KeyCoder* coder = storage_field->key_coder(); switch (ft) { -#define CASE(FT, PT) \ - case FieldType::FT: \ - if (full_encode) { \ - full_encode_field_as_key(f, coder, buf); \ - } else { \ - encode_field_as_key(f, coder, storage_field->index_size(), buf); \ - } \ +#define CASE(FT, PT) \ + case FieldType::FT: \ + full_encode_field_as_key(f, coder, buf); \ break; DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE) #undef CASE From 56f5489c3e285c48e470a78059a6604e38f2dab0 Mon Sep 17 00:00:00 2001 From: csun5285 Date: Mon, 11 May 2026 11:34:36 +0800 Subject: [PATCH 10/10] [test](key-coder) lock compute-layer ordering through full_encode_field_as_key Adds a TEST_F that walks an ascending sequence of compute-layer values for every (PrimitiveType, FieldType) pair that goes through `PrimitiveTypeConvertor` + `KeyCoder` and asserts the encoded byte order matches. Coverage (21 key-encodable non-string types): - BOOLEAN, TINYINT/SMALLINT/INT/BIGINT/LARGEINT (incl. numeric_limits) - FLOAT, DOUBLE (finite values + +/-infinity, skipping NaN/-0 covered separately by FloatComprehensiveOrdering) - DECIMAL32/64/128I/256: 2-3 scale interpretations each (scale=0 boundary + a representative mid scale + max scale where applicable) - DECIMALV2: full int128 split via decimal12_t {int_value, frac_value} - DATE V1, DATETIME V1, DATEV2, DATETIMEV2 (scales 0/3/6 incl. microsecond=999999 -> next-second carry), TIMESTAMPTZ - IPV4, IPV6 Locks in: - the compute-layer < <=> encoded byte < contract for the `full_encode_field_as_key` path used by both `RowCursor::encode_key` (short-key index, MOW PK) and `encode_bkd_field_ascending` (BKD inverted index) - the implicit "scale does not enter the encode path" property, documented per scale-labelled subgroup -- scale/frac live on TabletColumn metadata above this layer and never reach PrimitiveTypeConvertor or KeyCoder Raw decimal values are constructed as `whole * pow10_iN(scale) + frac` so they read like `whole.frac` at the labelled scale; IPv4 values use an `ip4(a, b, c, d)` lambda; date/time use existing `pack_d` / `pack_dt` or `create_from_olap_date/datetime` helpers. Co-Authored-By: Claude Opus 4.7 (1M context) --- be/test/storage/key_coder_test.cpp | 352 +++++++++++++++++++++++++++++ 1 file changed, 352 insertions(+) diff --git a/be/test/storage/key_coder_test.cpp b/be/test/storage/key_coder_test.cpp index 580df61a53f1d2..5362645078677c 100644 --- a/be/test/storage/key_coder_test.cpp +++ b/be/test/storage/key_coder_test.cpp @@ -25,7 +25,12 @@ #include #include +#include "core/field.h" +#include "core/types.h" #include "core/uint24.h" +#include "core/value/decimalv2_value.h" +#include "core/value/timestamptz_value.h" +#include "core/value/vdatetime_value.h" #include "gtest/gtest_pred_impl.h" #include "testutil/test_util.h" #include "util/debug_util.h" @@ -278,6 +283,353 @@ TEST_F(KeyCoderTest, test_decimal) { } } +// Encode an ascending sequence of compute-layer values via +// full_encode_field_as_key -- the same helper RowCursor::encode_key and +// the BKD inverted-index reader use -- and assert the byte order matches. +// +// Locks in the contract that PrimitiveTypeConvertor + KeyCoder together +// preserve the compute-layer ordering for every (PrimitiveType, FieldType) +// pair used as a sortable key. +// +// Why no `scale` or `frac` parameter? The contract under test is exactly +// "the encode path does not read scale/frac". They live on TabletColumn +// metadata (`_precision`, `_frac`) one layer above and never reach +// `Field::create_field`, `PrimitiveTypeConvertor`, or `KeyCoder`. +// So each subgroup below picks raw ints that *would* arise from a column at +// some hypothetical scale, labels the subgroup `scale=N` to make the human +// interpretation explicit, and asserts ordering -- the encode result is by +// construction identical regardless of which scale the column declared. +template +static void check_full_encode_preserves_order(FieldType ft, const std::vector& ascending, + const char* label) { + const KeyCoder* coder = get_key_coder(ft); + ASSERT_NE(coder, nullptr) << label; + std::vector encoded; + encoded.reserve(ascending.size()); + for (const auto& v : ascending) { + Field f = Field::create_field(v); + std::string buf; + full_encode_field_as_key(f, coder, &buf); + encoded.push_back(std::move(buf)); + } + for (size_t i = 0; i + 1 < encoded.size(); ++i) { + EXPECT_LT(encoded[i], encoded[i + 1]) << label << " idx=" << i; + } +} + +TEST_F(KeyCoderTest, full_encode_field_as_key_preserves_compute_layer_ordering) { + // pow10 = 10^scale as the underlying raw int type. Used to turn a + // human-written decimal literal "whole.frac" into the raw int the column + // would actually store at the labelled scale, so the test reads like + // raw = whole * pow10 + frac (signs aligned by the caller) + // rather than as opaque magic numbers. + auto pow10_i32 = [](int scale) { + int32_t r = 1; + for (int i = 0; i < scale; ++i) r *= 10; + return r; + }; + auto pow10_i64 = [](int scale) { + int64_t r = 1; + for (int i = 0; i < scale; ++i) r *= 10; + return r; + }; + auto pow10_i128 = [](int scale) { + int128_t r = 1; + for (int i = 0; i < scale; ++i) r *= 10; + return r; + }; + auto pow10_i256 = [](int scale) { + wide::Int256 r {1}; + for (int i = 0; i < scale; ++i) r *= wide::Int256 {10}; + return r; + }; + + // -------- DECIMAL32 (compute=Decimal32, storage=Int32) -------- + // scale=0 (DECIMAL(9,0)): whole int32 range + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL32, + {Decimal32(std::numeric_limits::min()), // INT32_MIN + Decimal32(int32_t(-99999)), // -99999 + Decimal32(int32_t(-1)), // -1 + Decimal32(int32_t(0)), // 0 + Decimal32(int32_t(1)), // 1 + Decimal32(int32_t(99999)), // 99999 + Decimal32(std::numeric_limits::max())}, // INT32_MAX + "DECIMAL32 scale=0"); + // scale=2 (DECIMAL(9,2)): raw = whole * 100 + frac + { + const int32_t s = pow10_i32(2); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL32, + {Decimal32(-123456 * s), // -123456.00 + Decimal32(-12 * s - 34), // -12.34 + Decimal32(-1), // -0.01 + Decimal32(0), // 0.00 + Decimal32(1), // 0.01 + Decimal32(12 * s + 34), // 12.34 + Decimal32(999999 * s + 99)}, // 999999.99 + "DECIMAL32 scale=2"); + } + // scale=9 (DECIMAL(9,9)): -0.999999999 .. +0.999999999 (whole always 0) + { + const int32_t s = pow10_i32(9); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL32, + {Decimal32(-(s - 1)), // -0.999999999 + Decimal32(-1), // -0.000000001 + Decimal32(0), // 0 + Decimal32(1), // 0.000000001 + Decimal32(s - 1)}, // 0.999999999 + "DECIMAL32 scale=9"); + } + + // -------- DECIMAL64 (compute=Decimal64, storage=Int64) -------- + // scale=0: whole int64 range + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL64, + {Decimal64(std::numeric_limits::min()), // INT64_MIN + Decimal64(int64_t(-1'000'000'000LL)), // -1 000 000 000 + Decimal64(int64_t(-1)), Decimal64(int64_t(0)), Decimal64(int64_t(1)), + Decimal64(int64_t(1'000'000'000LL)), // 1 000 000 000 + Decimal64(std::numeric_limits::max())}, // INT64_MAX + "DECIMAL64 scale=0"); + // scale=4 (DECIMAL(18,4)) + { + const int64_t s = pow10_i64(4); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL64, + {Decimal64(-123456 * s - 7890), // -123456.7890 + Decimal64(-int64_t(1)), // -0.0001 + Decimal64(int64_t(0)), // 0.0000 + Decimal64(int64_t(1)), // 0.0001 + Decimal64(int64_t(99'999'999'999) * s + 9999)}, // 99999999999.9999 + "DECIMAL64 scale=4"); + } + // scale=18 (DECIMAL(18,18)): whole always 0, full fractional range + { + const int64_t s = pow10_i64(18); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL64, + {Decimal64(-(s - 1)), // -0.999999999999999999 + Decimal64(int64_t(-1)), Decimal64(int64_t(0)), Decimal64(int64_t(1)), + Decimal64(s - 1)}, // 0.999999999999999999 + "DECIMAL64 scale=18"); + } + + // -------- DECIMAL128I (compute=Decimal128V3, storage=Int128) -------- + // scale=0: span ±2^100 to exercise both halves of int128. + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL128I, + {Decimal128V3(-(static_cast(1) << 100)), // -2^100 + Decimal128V3(int128_t(-1)), Decimal128V3(int128_t(0)), Decimal128V3(int128_t(1)), + Decimal128V3(static_cast(1) << 100)}, // 2^100 + "DECIMAL128I scale=0"); + // scale=10 (DECIMAL(38,10)) + { + const int128_t s = pow10_i128(10); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL128I, + {Decimal128V3(-int128_t(100) * s), // -100.0000000000 + Decimal128V3(int128_t(-1)), // -0.0000000001 + Decimal128V3(int128_t(0)), + Decimal128V3(int128_t(1)), // 0.0000000001 + Decimal128V3(int128_t(12345) * s + 6789), // 12345.0000006789 + Decimal128V3(static_cast(1'000'000'000'000'000LL) * + static_cast(1'000'000'000'000'000LL))}, // 10^30 + "DECIMAL128I scale=10"); + } + + // -------- DECIMAL256 (compute=Decimal256, storage=wide::Int256) -------- + // scale=0: span beyond int128 to exercise the upper halves of int256. + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL256, + {Decimal256(wide::Int256(-1'000'000'000'000LL)), // -10^12 + Decimal256(wide::Int256(-1)), Decimal256(wide::Int256(0)), Decimal256(wide::Int256(1)), + Decimal256(wide::Int256(1'000'000'000'000'000'000LL))}, // 10^18 + "DECIMAL256 scale=0"); + // scale=20 (DECIMAL(76,20)) + { + const wide::Int256 s = pow10_i256(20); + const wide::Int256 big = pow10_i256(36); // far beyond int128 + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL256, + {Decimal256(-big), // -10^36 + Decimal256(-s), // -1.0 + Decimal256(wide::Int256(-1)), // -1e-20 + Decimal256(wide::Int256(0)), + Decimal256(wide::Int256(1)), // 1e-20 + Decimal256(s), // 1.0 + Decimal256(big)}, // 10^36 + "DECIMAL256 scale=20"); + } + + // -------- DECIMALV2 (compute=DecimalV2Value, storage=decimal12_t) -------- + // DECIMALV2 is fixed at DECIMAL(27,9). The compute->storage conversion + // splits the int128 into {int_part, frac_part} where frac is the lower + // 9 digits (DecimalV2Value's `frac_value()` * 10^-9). Caller passes the + // two parts with matching sign. + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL, + {DecimalV2Value::get_min_decimal(), // -999999999999999999.999999999 + DecimalV2Value(int64_t(-100), int64_t(0)), // -100.000000000 + DecimalV2Value(int64_t(-1), int64_t(-500'000'000)), // -1.500000000 + DecimalV2Value(int64_t(0), int64_t(0)), // 0 + DecimalV2Value(int64_t(1), int64_t(500'000'000)), // 1.500000000 + DecimalV2Value(int64_t(100), int64_t(0)), // 100.000000000 + DecimalV2Value::get_max_decimal()}, // 999999999999999999.999999999 + "DECIMALV2"); + + // -------- DATEV2 (compute=DateV2Value, storage=uint32) -------- + auto pack_d = [](int y, int m, int d) -> uint32_t { return uint32_t((y << 9) | (m << 5) | d); }; + check_full_encode_preserves_order>( + FieldType::OLAP_FIELD_TYPE_DATEV2, + {DateV2Value(pack_d(0001, 1, 1)), + DateV2Value(pack_d(1970, 1, 1)), + DateV2Value(pack_d(2024, 12, 31)), + DateV2Value(pack_d(9999, 12, 31))}, + "DATEV2"); + + // -------- DATETIMEV2 (compute=DateV2Value, storage=uint64) -------- + // Per vdatetime_value.h: bits = year(14)|month(4)|day(5)|hour(5)|minute(6)|second(6)|microsec(20). + // The microsecond field is always 20-bit regardless of the column's + // declared scale (0..6); it's just zero-padded for lower scales. So + // values across all scales coexist in the same uint64 address space and + // KeyCoder must keep them chronologically ordered. + auto pack_dt = [](int y, int mo, int d, int h, int mi, int s, uint32_t us = 0) -> uint64_t { + uint64_t date = (uint64_t(y) << 9) | (uint64_t(mo) << 5) | uint64_t(d); + return (date << 37) | (uint64_t(h) << 32) | (uint64_t(mi) << 26) | (uint64_t(s) << 20) | + uint64_t(us); + }; + using DTV2 = DateV2Value; + // scale=0: microsecond always zero + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DATETIMEV2, + {DTV2(pack_dt(2020, 1, 1, 12, 0, 0)), DTV2(pack_dt(2024, 3, 10, 9, 30, 0)), + DTV2(pack_dt(2024, 12, 31, 23, 59, 59))}, + "DATETIMEV2 scale=0"); + // scale=3: microsecond multiples of 1000 + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DATETIMEV2, + {DTV2(pack_dt(2024, 3, 10, 9, 30, 0, 1'000)), + DTV2(pack_dt(2024, 3, 10, 9, 30, 0, 123'000)), + DTV2(pack_dt(2024, 3, 10, 9, 30, 0, 999'000)), + DTV2(pack_dt(2024, 3, 10, 9, 30, 1, 0))}, + "DATETIMEV2 scale=3"); + // scale=6: full microsecond resolution; verifies ordering is byte-stable + // even at the boundary between us=999999 and the next-second carry. + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DATETIMEV2, + {DTV2(pack_dt(2024, 3, 10, 9, 30, 0, 0)), DTV2(pack_dt(2024, 3, 10, 9, 30, 0, 1)), + DTV2(pack_dt(2024, 3, 10, 9, 30, 0, 999'998)), + DTV2(pack_dt(2024, 3, 10, 9, 30, 0, 999'999)), + DTV2(pack_dt(2024, 3, 10, 9, 30, 1, 0))}, + "DATETIMEV2 scale=6"); + + // -------- TIMESTAMPTZ (same packing as DATETIMEV2) -------- + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ, + {TimestampTzValue(pack_dt(2020, 1, 1, 12, 0, 0)), + TimestampTzValue(pack_dt(2024, 3, 10, 9, 30, 0, 123'456)), + TimestampTzValue(pack_dt(2024, 12, 31, 23, 59, 59, 999'999))}, + "TIMESTAMPTZ"); + + // -------- BOOLEAN (compute=storage=UInt8, only {0, 1}) -------- + check_full_encode_preserves_order(FieldType::OLAP_FIELD_TYPE_BOOL, + {UInt8(0), UInt8(1)}, "BOOLEAN"); + + // -------- Plain integer keys (compute=storage) -------- + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_TINYINT, + {std::numeric_limits::min(), int8_t(-1), int8_t(0), int8_t(1), + std::numeric_limits::max()}, + "TINYINT"); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_SMALLINT, + {std::numeric_limits::min(), int16_t(-1), int16_t(0), int16_t(1), + std::numeric_limits::max()}, + "SMALLINT"); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_INT, + {std::numeric_limits::min(), int32_t(-1), int32_t(0), int32_t(1), + std::numeric_limits::max()}, + "INT"); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_BIGINT, + {std::numeric_limits::min(), int64_t(-1), int64_t(0), int64_t(1), + std::numeric_limits::max()}, + "BIGINT"); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_LARGEINT, + {-(static_cast(1) << 100), int128_t(-1), int128_t(0), int128_t(1), + static_cast(1) << 100}, + "LARGEINT"); + + // -------- FLOAT / DOUBLE (sign-magnitude flip in KeyCoder) -------- + // KeyCoderTraitsForFloat byte-encodes finite values byte-comparably; NaN and + // signed-zero ambiguity have their own dedicated tests above (FloatOrdering / + // FloatComprehensiveOrdering), so here we just exercise the typical path. + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_FLOAT, + {-std::numeric_limits::infinity(), -1e10f, -1.0f, -1e-10f, 0.0f, 1e-10f, 1.0f, + 1e10f, std::numeric_limits::infinity()}, + "FLOAT"); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DOUBLE, + {-std::numeric_limits::infinity(), -1e100, -1.0, -1e-100, 0.0, 1e-100, 1.0, + 1e100, std::numeric_limits::infinity()}, + "DOUBLE"); + + // -------- DATE V1 (compute=VecDateTimeValue, storage=uint24_t packed) -------- + // Storage = (year << 9) | (month << 5) | day, same packing as DATEV2 just + // narrower. The PrimitiveTypeConvertor specialisation runs to_olap_date() + // to project compute -> storage. + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DATE, + {VecDateTimeValue::create_from_olap_date(pack_d(1900, 1, 1)), + VecDateTimeValue::create_from_olap_date(pack_d(1970, 1, 1)), + VecDateTimeValue::create_from_olap_date(pack_d(2024, 12, 31)), + VecDateTimeValue::create_from_olap_date(pack_d(9999, 12, 31))}, + "DATE V1"); + + // -------- DATETIME V1 (compute=VecDateTimeValue, storage=int64 decimal-packed) -------- + // Storage = YYYYMMDDhhmmss interpreted as int64 (sparse compared to V2 bit + // packing but still chronologically ordered as integers). + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DATETIME, + {VecDateTimeValue::create_from_olap_datetime(uint64_t(19700101000000ULL)), + VecDateTimeValue::create_from_olap_datetime(uint64_t(20240310093000ULL)), + VecDateTimeValue::create_from_olap_datetime(uint64_t(20241231235959ULL)), + VecDateTimeValue::create_from_olap_datetime(uint64_t(99991231235959ULL))}, + "DATETIME V1"); + + // -------- IPV4 (uint32_t, dotted-quad big-endian view) -------- + auto ip4 = [](uint8_t a, uint8_t b, uint8_t c, uint8_t d) -> uint32_t { + return (uint32_t(a) << 24) | (uint32_t(b) << 16) | (uint32_t(c) << 8) | uint32_t(d); + }; + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_IPV4, + {ip4(0, 0, 0, 1), // 0.0.0.1 + ip4(10, 0, 0, 1), // 10.0.0.1 + ip4(127, 0, 0, 1), // 127.0.0.1 + ip4(192, 168, 0, 1), // 192.168.0.1 + ip4(255, 255, 255, 254)}, // 255.255.255.254 + "IPV4"); + + // -------- IPV6 (uint128_t, 16-byte big-endian view) -------- + auto ip6 = [](uint64_t hi, uint64_t lo) -> uint128_t { + return (static_cast(hi) << 64) | lo; + }; + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_IPV6, + {ip6(0, 1), // ::1 + ip6(0, 0x0000FFFF7F000001ULL), // ::ffff:127.0.0.1 + ip6(0x20010DB800000000ULL, 1), // 2001:db8::1 + ip6(0xFE80000000000000ULL, 1), // fe80::1 + ip6(0xFFFFFFFFFFFFFFFFULL, + 0xFFFFFFFFFFFFFFFEULL)}, // ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe + "IPV6"); +} + TEST_F(KeyCoderTest, test_char) { auto key_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_CHAR);