diff --git a/be/src/core/field.cpp b/be/src/core/field.cpp index 065be343962dd9..f63aaf8227a7c6 100644 --- a/be/src/core/field.cpp +++ b/be/src/core/field.cpp @@ -649,16 +649,20 @@ std::string Field::get_type_name() const { template typename PrimitiveTypeTraits::CppType& Field::get() { - DCHECK(T == type || (is_string_type(type) && is_string_type(T)) || type == TYPE_NULL) - << "Type mismatch: requested " << type_to_string(T) << ", actual " << get_type_name(); + if (T != type && !(is_string_type(type) && is_string_type(T)) && type != TYPE_NULL) { + throw Exception(Status::FatalError("Field::get type mismatch: requested {}, actual {}", + type_to_string(T), get_type_name())); + } auto* MAY_ALIAS ptr = reinterpret_cast::CppType*>(&storage); return *ptr; } template const typename PrimitiveTypeTraits::CppType& Field::get() const { - DCHECK(T == type || (is_string_type(type) && is_string_type(T)) || type == TYPE_NULL) - << "Type mismatch: requested " << type_to_string(T) << ", actual " << get_type_name(); + if (T != type && !(is_string_type(type) && is_string_type(T)) && type != TYPE_NULL) { + throw Exception(Status::FatalError("Field::get type mismatch: requested {}, actual {}", + type_to_string(T), get_type_name())); + } const auto* MAY_ALIAS ptr = reinterpret_cast::CppType*>(&storage); return *ptr; diff --git a/be/src/exprs/function/array/function_array_index.h b/be/src/exprs/function/array/function_array_index.h index 5ab6ca68c3c3f5..3f39fc627b4887 100644 --- a/be/src/exprs/function/array/function_array_index.h +++ b/be/src/exprs/function/array/function_array_index.h @@ -41,6 +41,7 @@ #include "core/data_type/data_type_number.h" // IWYU pragma: keep #include "core/data_type/define_primitive_type.h" #include "core/data_type/primitive_type.h" +#include "core/field.h" #include "core/string_ref.h" #include "core/types.h" #include "exprs/function/function.h" @@ -151,7 +152,6 @@ class FunctionArrayIndex : public IFunction { } Field param_value; arguments[0].column->get(0, param_value); - auto param_type = arguments[0].type->get_primitive_type(); // The current implementation for the inverted index of arrays cannot handle cases where the array contains null values, // meaning an item in the array is null. if (param_value.is_null()) { @@ -164,13 +164,10 @@ class FunctionArrayIndex : public IFunction { RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); null_bitmap = null_bitmap_cache_handle.get_bitmap(); } - std::unique_ptr query_param = nullptr; - RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value(param_type, ¶m_value, - query_param)); InvertedIndexParam param; param.column_name = data_type_with_name.first; param.column_type = data_type_with_name.second; - param.query_value = query_param->get_value(); + param.query_value = param_value; param.query_type = segment_v2::InvertedIndexQueryType::EQUAL_QUERY; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/exprs/function/array/function_arrays_overlap.h b/be/src/exprs/function/array/function_arrays_overlap.h index c1acded2e3170b..d71643945ca796 100644 --- a/be/src/exprs/function/array/function_arrays_overlap.h +++ b/be/src/exprs/function/array/function_arrays_overlap.h @@ -230,10 +230,6 @@ class FunctionArraysOverlap : public IFunction { Field param_value; arguments[0].column->get(0, param_value); DCHECK(arguments[0].type->get_primitive_type() == TYPE_ARRAY); - auto nested_param_type = - check_and_get_data_type(remove_nullable(arguments[0].type).get()) - ->get_nested_type() - ->get_primitive_type(); // The current implementation for the inverted index of arrays cannot handle cases where the array contains null values, // meaning an item in the array is null. if (param_value.is_null()) { @@ -246,7 +242,6 @@ class FunctionArraysOverlap : public IFunction { RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle)); null_bitmap = null_bitmap_cache_handle.get_bitmap(); } - std::unique_ptr query_param = nullptr; const Array& query_val = param_value.get(); InvertedIndexParam param; @@ -260,9 +255,7 @@ class FunctionArraysOverlap : public IFunction { if (nested_query_val.is_null()) { return Status::OK(); } - RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( - nested_param_type, &nested_query_val, query_param)); - param.query_value = query_param->get_value(); + param.query_value = nested_query_val; param.roaring = std::make_shared(); param.analyzer_ctx = analyzer_ctx; RETURN_IF_ERROR(iter->read_from_index(segment_v2::IndexParam {¶m})); diff --git a/be/src/exprs/function/function_ip.h b/be/src/exprs/function/function_ip.h index 48906f440ec87d..cb176a081efd95 100644 --- a/be/src/exprs/function/function_ip.h +++ b/be/src/exprs/function/function_ip.h @@ -40,6 +40,7 @@ #include "core/data_type/data_type_number.h" #include "core/data_type/data_type_string.h" #include "core/data_type/data_type_struct.h" +#include "core/field.h" #include "core/types.h" #include "core/value/ip_address_cidr.h" #include "exec/common/endian.h" @@ -707,29 +708,22 @@ class FunctionIsIPAddressInRange : public IFunction { // apply for inverted index std::shared_ptr null_bitmap = std::make_shared(); - auto param_type = data_type_with_name.second->get_primitive_type(); - std::unique_ptr query_param = nullptr; - // >= min ip - RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value( - param_type, &min_ip, query_param)); segment_v2::InvertedIndexParam min_param; min_param.column_name = data_type_with_name.first; min_param.column_type = data_type_with_name.second; min_param.query_type = segment_v2::InvertedIndexQueryType::GREATER_EQUAL_QUERY; - min_param.query_value = query_param->get_value(); + min_param.query_value = min_ip; min_param.num_rows = num_rows; min_param.roaring = std::make_shared(); RETURN_IF_ERROR(iter->read_from_index(&min_param)); // <= max ip - RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value( - param_type, &max_ip, query_param)); segment_v2::InvertedIndexParam max_param; max_param.column_name = data_type_with_name.first; max_param.column_type = data_type_with_name.second; max_param.query_type = segment_v2::InvertedIndexQueryType::LESS_EQUAL_QUERY; - max_param.query_value = query_param->get_value(); + max_param.query_value = max_ip; max_param.num_rows = num_rows; max_param.roaring = std::make_shared(); RETURN_IF_ERROR(iter->read_from_index(&max_param)); diff --git a/be/src/exprs/function/function_multi_match.cpp b/be/src/exprs/function/function_multi_match.cpp index 2ba2a42b08d04b..ecc10e593ef4a8 100644 --- a/be/src/exprs/function/function_multi_match.cpp +++ b/be/src/exprs/function/function_multi_match.cpp @@ -25,6 +25,7 @@ #include #include "core/column/column.h" +#include "core/field.h" #include "exprs/function/simple_function_factory.h" #include "exprs/vslot_ref.h" #include "io/fs/file_reader.h" @@ -79,13 +80,9 @@ Status FunctionMultiMatch::evaluate_inverted_index( return Status::Error( "arguments for multi_match must be string"); } - // Must convert StringRef to std::string because downstream readers - // (e.g. FullTextIndexReader::query) reinterpret_cast query_value as std::string*. - std::string query_str(query_str_ref.data, query_str_ref.size); - // search InvertedIndexParam param; - param.query_value = &query_str; + param.query_value = Field::create_field(query_str_ref.to_string()); param.query_type = query_type; param.num_rows = num_rows; for (size_t i = 0; i < data_type_with_names.size(); i++) { diff --git a/be/src/exprs/function/functions_comparison.h b/be/src/exprs/function/functions_comparison.h index 99ad8e87b0b5ba..5d891248e08c11 100644 --- a/be/src/exprs/function/functions_comparison.h +++ b/be/src/exprs/function/functions_comparison.h @@ -34,6 +34,7 @@ #include "core/data_type/data_type_string.h" #include "core/data_type/define_primitive_type.h" #include "core/decimal_comparison.h" +#include "core/field.h" #include "core/memcmp_small.h" #include "core/value/vdatetime_value.h" #include "exprs/function/function.h" @@ -486,15 +487,10 @@ class FunctionComparison : public IFunction { if (param_value.is_null()) { return Status::OK(); } - auto param_type = arguments[0].type->get_primitive_type(); - std::unique_ptr query_param = nullptr; - RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value( - param_type, ¶m_value, query_param)); - segment_v2::InvertedIndexParam param; param.column_name = data_type_with_name.first; param.column_type = data_type_with_name.second; - param.query_value = query_param->get_value(); + param.query_value = param_value; param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/exprs/function/in.h b/be/src/exprs/function/in.h index edcf069e6313da..78435dd27640a0 100644 --- a/be/src/exprs/function/in.h +++ b/be/src/exprs/function/in.h @@ -38,6 +38,7 @@ #include "core/data_type/data_type_nullable.h" #include "core/data_type/data_type_number.h" #include "core/data_type/define_primitive_type.h" +#include "core/field.h" #include "core/string_ref.h" #include "core/types.h" #include "exprs/aggregate/aggregate_function.h" @@ -161,7 +162,6 @@ class FunctionIn : public IFunction { for (const auto& arg : arguments) { Field param_value; arg.column->get(0, param_value); - auto param_type = arg.type->get_primitive_type(); if (param_value.is_null()) { // predicate like column NOT IN (NULL, '') should not push down to index. if (negative) { @@ -170,14 +170,11 @@ class FunctionIn : public IFunction { *roaring |= *null_bitmap; continue; } - std::unique_ptr query_param = nullptr; - RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( - param_type, ¶m_value, query_param)); InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY; segment_v2::InvertedIndexParam param; param.column_name = data_type_with_name.first; param.column_type = data_type_with_name.second; - param.query_value = query_param->get_value(); + param.query_value = param_value; param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/exprs/function/match.cpp b/be/src/exprs/function/match.cpp index d2814d1140aac4..adab0ba82c7bb8 100644 --- a/be/src/exprs/function/match.cpp +++ b/be/src/exprs/function/match.cpp @@ -19,6 +19,7 @@ #include +#include "core/field.h" #include "runtime/query_context.h" #include "runtime/runtime_state.h" #include "storage/index/index_reader_helper.h" @@ -79,14 +80,10 @@ Status FunctionMatchBase::evaluate_inverted_index( return Status::Error( "arguments for match must be string"); } - std::unique_ptr query_param = nullptr; - RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value(param_type, ¶m_value, - query_param)); - InvertedIndexParam param; param.column_name = data_type_with_name.first; param.column_type = data_type_with_name.second; - param.query_value = query_param->get_value(); + param.query_value = param_value; param.query_type = get_query_type_from_fn_name(); param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/storage/field.h b/be/src/storage/field.h index 1658248effd33a..ab006b3f72a06a 100644 --- a/be/src/storage/field.h +++ b/be/src/storage/field.h @@ -64,10 +64,6 @@ class StorageField { const std::string& name() const { return _name; } const PathInDataPtr& path() const { return _path; } - virtual void set_to_max(char* buf) const { return _type_info->set_to_max(buf); } - - virtual void set_to_min(char* buf) const { return _type_info->set_to_min(buf); } - virtual StorageField* clone() const { auto* local = new StorageField(_desc); this->clone(local); @@ -88,6 +84,8 @@ class StorageField { void full_encode_ascending(const void* value, std::string* buf) const { _key_coder->full_encode_ascending(value, buf); } + + const KeyCoder* key_coder() const { return _key_coder; } void add_sub_field(std::unique_ptr sub_field) { _sub_fields.emplace_back(std::move(sub_field)); } @@ -172,12 +170,6 @@ class CharField : public StorageField { StorageField::clone(local); return local; } - - void set_to_max(char* ch) const override { - auto slice = reinterpret_cast(ch); - slice->size = _length; - memset(slice->data, 0xFF, slice->size); - } }; class VarcharField : public StorageField { @@ -189,12 +181,6 @@ class VarcharField : public StorageField { StorageField::clone(local); return local; } - - void set_to_max(char* ch) const override { - auto slice = reinterpret_cast(ch); - slice->size = _length - OLAP_VARCHAR_MAX_BYTES; - memset(slice->data, 0xFF, slice->size); - } }; class StringField : public StorageField { public: @@ -205,11 +191,6 @@ class StringField : public StorageField { StorageField::clone(local); return local; } - - void set_to_max(char* ch) const override { - auto slice = reinterpret_cast(ch); - memset(slice->data, 0xFF, slice->size); - } }; class BitmapAggField : public StorageField { diff --git a/be/src/storage/index/inverted/inverted_index_iterator.cpp b/be/src/storage/index/inverted/inverted_index_iterator.cpp index fbe06c054e51fb..936b82d5d56abe 100644 --- a/be/src/storage/index/inverted/inverted_index_iterator.cpp +++ b/be/src/storage/index/inverted/inverted_index_iterator.cpp @@ -135,7 +135,7 @@ Result InvertedIndexIterator::has_null() { Status InvertedIndexIterator::try_read_from_inverted_index(const InvertedIndexReaderPtr& reader, const std::string& column_name, - const void* query_value, + const Field& query_value, InvertedIndexQueryType query_type, size_t* count) { // NOTE: only bkd index support try read now. diff --git a/be/src/storage/index/inverted/inverted_index_iterator.h b/be/src/storage/index/inverted/inverted_index_iterator.h index e7418535d3cd48..afc4a663670633 100644 --- a/be/src/storage/index/inverted/inverted_index_iterator.h +++ b/be/src/storage/index/inverted/inverted_index_iterator.h @@ -19,6 +19,7 @@ #include #include +#include "core/field.h" #include "storage/index/analyzer_key_matcher.h" #include "storage/index/index_iterator.h" #include "storage/index/inverted/inverted_index_parser.h" @@ -29,7 +30,7 @@ namespace doris::segment_v2 { struct InvertedIndexParam { std::string column_name; DataTypePtr column_type; - const void* query_value; + Field query_value; InvertedIndexQueryType query_type; uint32_t num_rows; std::shared_ptr roaring; @@ -73,7 +74,7 @@ class InvertedIndexIterator : public IndexIterator { ENABLE_FACTORY_CREATOR(InvertedIndexIterator); Status try_read_from_inverted_index(const InvertedIndexReaderPtr& reader, - const std::string& column_name, const void* query_value, + const std::string& column_name, const Field& query_value, InvertedIndexQueryType query_type, size_t* count); // Normalize analyzer_key to lowercase. diff --git a/be/src/storage/index/inverted/inverted_index_reader.cpp b/be/src/storage/index/inverted/inverted_index_reader.cpp index f7b1b76dc6db0d..80de3b0aac8744 100644 --- a/be/src/storage/index/inverted/inverted_index_reader.cpp +++ b/be/src/storage/index/inverted/inverted_index_reader.cpp @@ -33,12 +33,15 @@ #include #include #include +#include #include "common/config.h" #include "common/exception.h" #include "common/logging.h" #include "common/status.h" +#include "core/data_type/primitive_type.h" #include "core/string_ref.h" +#include "core/type_limit.h" #include "runtime/runtime_profile.h" #include "runtime/runtime_state.h" #include "storage/field.h" @@ -58,6 +61,94 @@ #include "storage/types.h" #include "util/faststring.h" +namespace { + +// Sentinel values are sourced from the compute-layer `type_limit` and +// then projected onto the storage-layer POD via `PrimitiveTypeConvertor`. +// Routing through the compute layer keeps the +/- infinity constants +// single-sourced (e.g. DecimalV2 max lives only in DecimalV2Value::get_max_decimal, +// DATE bounds only in VecDateTimeValue::datetime_min/max_value), so types like +// decimal12_t and uint24_t — which have no std::numeric_limits specialisation — +// no longer need their own type_limit<> entries. +template +static void bkd_encode_min(const doris::KeyCoder* coder, std::string* out) { + using compute_t = typename doris::PrimitiveTypeTraits::CppType; + auto compute_v = doris::type_limit::min(); + auto v = doris::PrimitiveTypeConvertor::to_storage_field_type(compute_v); + coder->full_encode_ascending(&v, out); +} + +template +static void bkd_encode_max(const doris::KeyCoder* coder, std::string* out) { + using compute_t = typename doris::PrimitiveTypeTraits::CppType; + auto compute_v = doris::type_limit::max(); + auto v = doris::PrimitiveTypeConvertor::to_storage_field_type(compute_v); + coder->full_encode_ascending(&v, out); +} + +static doris::Status encode_bkd_field_ascending(doris::FieldType ft, const doris::Field& field, + const doris::KeyCoder* coder, std::string* out) { + // `actual` is the primitive type of the query Field from the caller; `PrimitiveType::PT` is the + // scalar type the BKD index stores (e.g. INT for an INT column or ARRAY index). + // Normally they match: `int_col = 1` -> both INT; `array_contains(int_arr, 2)` -> both INT. + // Mismatch happens when the query Field carries a non-scalar while BKD records the inner scalar: + // `arr = []` reaches here via `FunctionComparison` with the entire const ARRAY literal + // as the query Field, so `actual = TYPE_ARRAY` while PT is the inner scalar -- the predicate + // cannot be answered by BKD. Return INVERTED_INDEX_EVALUATE_SKIPPED so `_apply_index_expr` + // downgrades to scalar evaluation instead of crashing on `Field::get()` DCHECK below. +#define CASE(FT, PT) \ + case doris::FieldType::FT: { \ + const auto actual = field.get_type(); \ + if (actual != doris::PrimitiveType::PT && actual != doris::PrimitiveType::TYPE_NULL && \ + !(doris::is_string_type(actual) && doris::is_string_type(doris::PrimitiveType::PT))) { \ + return doris::Status::Error( \ + "BKD query value type {} does not match index type {}", \ + static_cast(actual), static_cast(ft)); \ + } \ + doris::full_encode_field_as_key(field, coder, out); \ + return doris::Status::OK(); \ + } + switch (ft) { + DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE) + default: + break; + } +#undef CASE + return doris::Status::InternalError("unsupported BKD field type {}", static_cast(ft)); +} + +static doris::Status encode_bkd_min_ascending(doris::FieldType ft, const doris::KeyCoder* coder, + std::string* out) { +#define CASE(FT, PT) \ + case doris::FieldType::FT: \ + bkd_encode_min(coder, out); \ + return doris::Status::OK(); + switch (ft) { + DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE) + default: + break; + } +#undef CASE + return doris::Status::InternalError("unsupported BKD field type {}", static_cast(ft)); +} + +static doris::Status encode_bkd_max_ascending(doris::FieldType ft, const doris::KeyCoder* coder, + std::string* out) { +#define CASE(FT, PT) \ + case doris::FieldType::FT: \ + bkd_encode_max(coder, out); \ + return doris::Status::OK(); + switch (ft) { + DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE) + default: + break; + } +#undef CASE + return doris::Status::InternalError("unsupported BKD field type {}", static_cast(ft)); +} + +} // anonymous namespace + namespace doris::segment_v2 { std::string InvertedIndexReader::get_index_file_path() { @@ -289,13 +380,13 @@ Status FullTextIndexReader::new_iterator(std::unique_ptr* iterato } Status FullTextIndexReader::query(const IndexQueryContextPtr& context, - const std::string& column_name, const void* query_value, + const std::string& column_name, const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx) { SCOPED_RAW_TIMER(&context->stats->inverted_index_query_timer); - std::string search_str = *reinterpret_cast(query_value); + std::string search_str = query_value.get(); VLOG_DEBUG << column_name << " begin to search the fulltext index from clucene, query_str [" << search_str << "]"; @@ -408,13 +499,14 @@ Status StringTypeInvertedIndexReader::new_iterator(std::unique_ptr& bit_map, const InvertedIndexAnalyzerCtx* /*analyzer_ctx*/) { SCOPED_RAW_TIMER(&context->stats->inverted_index_query_timer); - std::string search_str = *reinterpret_cast(query_value); + std::string search_str = query_value.get(); // If the written value exceeds ignore_above, it will be written as null. // The queried value exceeds ignore_above means the written value cannot be found. @@ -541,23 +633,26 @@ Status BkdIndexReader::new_iterator(std::unique_ptr* iterator) { } template -Status BkdIndexReader::construct_bkd_query_value(const void* query_value, +Status BkdIndexReader::construct_bkd_query_value(const Field& query_value, std::shared_ptr r, InvertedIndexVisitor* visitor) { - std::vector tmp(r->bytes_per_dim_); if constexpr (QT == InvertedIndexQueryType::EQUAL_QUERY) { - _value_key_coder->full_encode_ascending(query_value, &visitor->query_max); - _value_key_coder->full_encode_ascending(query_value, &visitor->query_min); + RETURN_IF_ERROR(encode_bkd_field_ascending(_type_info->type(), query_value, + _value_key_coder, &visitor->query_max)); + RETURN_IF_ERROR(encode_bkd_field_ascending(_type_info->type(), query_value, + _value_key_coder, &visitor->query_min)); } else if constexpr (QT == InvertedIndexQueryType::LESS_THAN_QUERY || QT == InvertedIndexQueryType::LESS_EQUAL_QUERY) { - _value_key_coder->full_encode_ascending(query_value, &visitor->query_max); - _type_info->set_to_min(tmp.data()); - _value_key_coder->full_encode_ascending(tmp.data(), &visitor->query_min); + RETURN_IF_ERROR(encode_bkd_field_ascending(_type_info->type(), query_value, + _value_key_coder, &visitor->query_max)); + RETURN_IF_ERROR(encode_bkd_min_ascending(_type_info->type(), _value_key_coder, + &visitor->query_min)); } else if constexpr (QT == InvertedIndexQueryType::GREATER_THAN_QUERY || QT == InvertedIndexQueryType::GREATER_EQUAL_QUERY) { - _value_key_coder->full_encode_ascending(query_value, &visitor->query_min); - _type_info->set_to_max(tmp.data()); - _value_key_coder->full_encode_ascending(tmp.data(), &visitor->query_max); + RETURN_IF_ERROR(encode_bkd_field_ascending(_type_info->type(), query_value, + _value_key_coder, &visitor->query_min)); + RETURN_IF_ERROR(encode_bkd_max_ascending(_type_info->type(), _value_key_coder, + &visitor->query_max)); } else { return Status::Error( "invalid query type when query bkd index"); @@ -566,7 +661,7 @@ Status BkdIndexReader::construct_bkd_query_value(const void* query_value, } Status BkdIndexReader::invoke_bkd_try_query(const IndexQueryContextPtr& context, - const void* query_value, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr r, size_t* count) { @@ -617,7 +712,7 @@ Status BkdIndexReader::invoke_bkd_try_query(const IndexQueryContextPtr& context, } Status BkdIndexReader::invoke_bkd_query(const IndexQueryContextPtr& context, - const void* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr r, std::shared_ptr& bit_map) { SCOPED_RAW_TIMER(&context->stats->inverted_index_searcher_search_timer); @@ -668,7 +763,7 @@ Status BkdIndexReader::invoke_bkd_query(const IndexQueryContextPtr& context, } Status BkdIndexReader::try_query(const IndexQueryContextPtr& context, - const std::string& column_name, const void* query_value, + const std::string& column_name, const Field& query_value, InvertedIndexQueryType query_type, size_t* count) { try { std::shared_ptr r; @@ -680,7 +775,8 @@ Status BkdIndexReader::try_query(const IndexQueryContextPtr& context, return st; } std::string query_str; - _value_key_coder->full_encode_ascending(query_value, &query_str); + RETURN_IF_ERROR(encode_bkd_field_ascending(_type_info->type(), query_value, + _value_key_coder, &query_str)); auto index_file_key = _index_file_reader->get_index_file_cache_key(&_index_meta); InvertedIndexQueryCache::CacheKey cache_key {index_file_key, column_name, query_type, @@ -704,7 +800,7 @@ Status BkdIndexReader::try_query(const IndexQueryContextPtr& context, } Status BkdIndexReader::query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* /*analyzer_ctx*/) { SCOPED_RAW_TIMER(&context->stats->inverted_index_query_timer); @@ -719,7 +815,8 @@ Status BkdIndexReader::query(const IndexQueryContextPtr& context, const std::str return st; } std::string query_str; - _value_key_coder->full_encode_ascending(query_value, &query_str); + RETURN_IF_ERROR(encode_bkd_field_ascending(_type_info->type(), query_value, + _value_key_coder, &query_str)); auto index_file_key = _index_file_reader->get_index_file_cache_key(&_index_meta); InvertedIndexQueryCache::CacheKey cache_key {index_file_key, column_name, query_type, diff --git a/be/src/storage/index/inverted/inverted_index_reader.h b/be/src/storage/index/inverted/inverted_index_reader.h index 906c1f512a15f1..38fd2e7cda40d6 100644 --- a/be/src/storage/index/inverted/inverted_index_reader.h +++ b/be/src/storage/index/inverted/inverted_index_reader.h @@ -26,6 +26,7 @@ #include "common/status.h" #include "core/data_type/primitive_type.h" +#include "core/field.h" #include "io/fs/file_system.h" #include "io/fs/path.h" #include "storage/index/index_query_context.h" @@ -223,11 +224,11 @@ class InvertedIndexReader : public IndexReader { IndexType index_type() override { return IndexType::INVERTED; } virtual Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) = 0; virtual Status try_query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, size_t* count) = 0; Status read_null_bitmap(const IndexQueryContextPtr& context, @@ -285,11 +286,11 @@ class FullTextIndexReader : public InvertedIndexReader { Status new_iterator(std::unique_ptr* iterator) override; Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override; Status try_query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, size_t* count) override { return Status::Error( "FullTextIndexReader not support try_query"); @@ -310,11 +311,11 @@ class StringTypeInvertedIndexReader : public InvertedIndexReader { Status new_iterator(std::unique_ptr* iterator) override; Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override; Status try_query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, size_t* count) override { return Status::Error( "StringTypeInvertedIndexReader not support try_query"); @@ -370,21 +371,21 @@ class BkdIndexReader : public InvertedIndexReader { Status new_iterator(std::unique_ptr* iterator) override; Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override; Status try_query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, size_t* count) override; - Status invoke_bkd_try_query(const IndexQueryContextPtr& context, const void* query_value, + Status invoke_bkd_try_query(const IndexQueryContextPtr& context, const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr r, size_t* count); - Status invoke_bkd_query(const IndexQueryContextPtr& context, const void* query_value, + Status invoke_bkd_query(const IndexQueryContextPtr& context, const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr r, std::shared_ptr& bit_map); template - Status construct_bkd_query_value(const void* query_value, + Status construct_bkd_query_value(const Field& query_value, std::shared_ptr r, InvertedIndexVisitor* visitor); @@ -396,131 +397,5 @@ class BkdIndexReader : public InvertedIndexReader { const KeyCoder* _value_key_coder {}; }; -template -class InvertedIndexQueryParam; - -/** - * @brief InvertedIndexQueryParamFactory is a factory class to create QueryValue object. - * we need a template function to make predict class like in_list_predict template class to use. - * also need a function with primitive type parameter to create inverted index query value. like some function expr: function_array_index - * Now we just mapping field value in query engine to storage field value - */ -class InvertedIndexQueryParamFactory { - ENABLE_FACTORY_CREATOR(InvertedIndexQueryParamFactory); - -public: - virtual ~InvertedIndexQueryParamFactory() = default; - - template - static Status create_query_value( - const ValueType* value, std::unique_ptr& result_param) { - static_assert(!std::is_same_v, - "ValueType cannot be void, as it is unsupported and dangerous."); - - using CPP_TYPE = typename PrimitiveTypeTraits::CppType; - std::unique_ptr> param = - InvertedIndexQueryParam::create_unique(); - - if constexpr (is_string_type(PT)) { - if constexpr (std::is_same_v) { - const auto& str = value->template get(); - param->set_value(str); - } else if constexpr (std::is_same_v) { - param->set_value(value); - } else { - static_assert(std::is_convertible_v, - "ValueType must be convertible to std::string for string types"); - param->set_value(std::string(*value)); - } - } else { - CPP_TYPE cpp_val; - if constexpr (std::is_same_v) { - auto field_val = value->template get(); - cpp_val = static_cast(field_val); - } else { - cpp_val = static_cast(*value); - } - - auto storage_val = PrimitiveTypeConvertor::to_storage_field_type(cpp_val); - param->set_value(&storage_val); - } - result_param = std::move(param); - return Status::OK(); - } - - static Status create_query_value( - const PrimitiveType& primitiveType, const doris::Field* value, - std::unique_ptr& result_param) { - switch (primitiveType) { -#define M(TYPE) \ - case TYPE: { \ - return create_query_value(value, result_param); \ - } - M(PrimitiveType::TYPE_BOOLEAN) - M(PrimitiveType::TYPE_TINYINT) - M(PrimitiveType::TYPE_SMALLINT) - M(PrimitiveType::TYPE_INT) - M(PrimitiveType::TYPE_BIGINT) - M(PrimitiveType::TYPE_LARGEINT) - M(PrimitiveType::TYPE_FLOAT) - M(PrimitiveType::TYPE_DOUBLE) - M(PrimitiveType::TYPE_DECIMALV2) - M(PrimitiveType::TYPE_DECIMAL32) - M(PrimitiveType::TYPE_DECIMAL64) - M(PrimitiveType::TYPE_DECIMAL128I) - M(PrimitiveType::TYPE_DECIMAL256) - M(PrimitiveType::TYPE_DATE) - M(PrimitiveType::TYPE_DATETIME) - M(PrimitiveType::TYPE_CHAR) - M(PrimitiveType::TYPE_VARCHAR) - M(PrimitiveType::TYPE_STRING) - M(PrimitiveType::TYPE_DATEV2) - M(PrimitiveType::TYPE_DATETIMEV2) - M(PrimitiveType::TYPE_IPV4) - M(PrimitiveType::TYPE_IPV6) -#undef M - default: - return Status::NotSupported("Unsupported primitive type {} for inverted index reader", - primitiveType); - } - }; - - virtual const void* get_value() const { - LOG_FATAL( - "Execution reached an undefined behavior code path in " - "InvertedIndexQueryParamFactory"); - __builtin_unreachable(); - }; -}; - -template -class InvertedIndexQueryParam : public InvertedIndexQueryParamFactory { - ENABLE_FACTORY_CREATOR(InvertedIndexQueryParam); - using storage_val = typename PrimitiveTypeTraits::StorageFieldType; - -public: - void set_value(const storage_val* value) { _value = *value; } - - const void* get_value() const override { return &_value; } - -private: - storage_val _value; -}; - -template - requires(is_string_type(PT)) -class InvertedIndexQueryParam : public InvertedIndexQueryParamFactory { - ENABLE_FACTORY_CREATOR(InvertedIndexQueryParam); - -public: - void set_value(const std::string& value) { _value = value; } - void set_value(const StringRef* value) { _value.assign(value->data, value->size); } - - const void* get_value() const override { return &_value; } - -private: - std::string _value; -}; - } // namespace segment_v2 } // namespace doris diff --git a/be/src/storage/key_coder.h b/be/src/storage/key_coder.h index 2952949c02d5d3..0c4bcf08d171e5 100644 --- a/be/src/storage/key_coder.h +++ b/be/src/storage/key_coder.h @@ -29,8 +29,10 @@ #include "absl/strings/substitute.h" #include "common/status.h" +#include "core/data_type/primitive_type.h" #include "core/decimal12.h" #include "core/extended_types.h" +#include "core/field.h" #include "core/types.h" #include "exec/common/endian.h" #include "storage/olap_common.h" @@ -446,4 +448,60 @@ template <> class KeyCoderTraits : public KeyCoderTraitsForFloat {}; +// X-macro listing every (FieldType, PrimitiveType) pair that goes through KeyCoder +// as a non-string scalar key. Strings are handled separately because they need +// length / padding logic outside KeyCoder. Each entry: M(FT_suffix, PT_suffix). +#define DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(M) \ + M(OLAP_FIELD_TYPE_BOOL, TYPE_BOOLEAN) \ + M(OLAP_FIELD_TYPE_TINYINT, TYPE_TINYINT) \ + M(OLAP_FIELD_TYPE_SMALLINT, TYPE_SMALLINT) \ + M(OLAP_FIELD_TYPE_INT, TYPE_INT) \ + M(OLAP_FIELD_TYPE_BIGINT, TYPE_BIGINT) \ + M(OLAP_FIELD_TYPE_LARGEINT, TYPE_LARGEINT) \ + M(OLAP_FIELD_TYPE_FLOAT, TYPE_FLOAT) \ + M(OLAP_FIELD_TYPE_DOUBLE, TYPE_DOUBLE) \ + M(OLAP_FIELD_TYPE_DECIMAL, TYPE_DECIMALV2) \ + M(OLAP_FIELD_TYPE_DECIMAL32, TYPE_DECIMAL32) \ + M(OLAP_FIELD_TYPE_DECIMAL64, TYPE_DECIMAL64) \ + M(OLAP_FIELD_TYPE_DECIMAL128I, TYPE_DECIMAL128I) \ + M(OLAP_FIELD_TYPE_DECIMAL256, TYPE_DECIMAL256) \ + M(OLAP_FIELD_TYPE_DATE, TYPE_DATE) \ + M(OLAP_FIELD_TYPE_DATETIME, TYPE_DATETIME) \ + M(OLAP_FIELD_TYPE_DATEV2, TYPE_DATEV2) \ + M(OLAP_FIELD_TYPE_DATETIMEV2, TYPE_DATETIMEV2) \ + M(OLAP_FIELD_TYPE_TIMESTAMPTZ, TYPE_TIMESTAMPTZ) \ + M(OLAP_FIELD_TYPE_IPV4, TYPE_IPV4) \ + M(OLAP_FIELD_TYPE_IPV6, TYPE_IPV6) + +// True for exactly the PrimitiveTypes listed in +// DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES. Strings (CHAR/VARCHAR/STRING/ +// VARBINARY) have their own short-key code path in row_cursor.cpp that calls +// storage_field->full_encode_ascending directly, and nested/aggregate types +// (ARRAY/MAP/STRUCT/VARIANT/HLL/BITMAP/JSONB/QUANTILE_STATE/AGG_STATE) are not +// key-encodable at all -- both groups must never reach the helpers below. +constexpr bool is_key_encodable_non_string_type(PrimitiveType pt) { + switch (pt) { +#define DORIS_KEY_ENCODABLE_CASE(FT, PT) \ + case PrimitiveType::PT: \ + return true; + DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(DORIS_KEY_ENCODABLE_CASE) +#undef DORIS_KEY_ENCODABLE_CASE + default: + return false; + } +} + +// Convert a Field value to its storage representation (via PrimitiveTypeConvertor) +// and full-encode it as a byte-comparable ascending key via KeyCoder. +template +inline void full_encode_field_as_key(const Field& f, const KeyCoder* coder, std::string* buf) { + static_assert(is_key_encodable_non_string_type(PT), + "full_encode_field_as_key is for non-string scalar keys only; " + "strings have their own path in RowCursor that calls " + "storage_field->full_encode_ascending directly, and nested / " + "aggregate types are not key-encodable"); + auto v = PrimitiveTypeConvertor::to_storage_field_type(f.get()); + coder->full_encode_ascending(&v, buf); +} + } // namespace doris diff --git a/be/src/storage/predicate/comparison_predicate.h b/be/src/storage/predicate/comparison_predicate.h index c195b15fefacda..12f57237e7edfa 100644 --- a/be/src/storage/predicate/comparison_predicate.h +++ b/be/src/storage/predicate/comparison_predicate.h @@ -22,6 +22,7 @@ #include "common/compare.h" #include "core/column/column_dictionary.h" +#include "core/field.h" #include "storage/index/bloom_filter/bloom_filter.h" #include "storage/index/inverted/inverted_index_cache.h" // IWYU pragma: keep #include "storage/index/inverted/inverted_index_reader.h" @@ -92,14 +93,10 @@ class ComparisonPredicateBase final : public ColumnPredicate { return Status::InvalidArgument("invalid comparison predicate type {}", PT); } - std::unique_ptr query_param = nullptr; - RETURN_IF_ERROR( - InvertedIndexQueryParamFactory::create_query_value(&_value, query_param)); - InvertedIndexParam param; param.column_name = name_with_type.first; param.column_type = name_with_type.second; - param.query_value = query_param->get_value(); + param.query_value = Field::create_field(_value); param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/storage/predicate/in_list_predicate.h b/be/src/storage/predicate/in_list_predicate.h index 56463879185ee0..f7529b7ac69820 100644 --- a/be/src/storage/predicate/in_list_predicate.h +++ b/be/src/storage/predicate/in_list_predicate.h @@ -27,6 +27,7 @@ #include "core/data_type/define_primitive_type.h" #include "core/data_type/primitive_type.h" #include "core/decimal12.h" +#include "core/field.h" #include "core/string_ref.h" #include "core/type_limit.h" #include "core/types.h" @@ -161,23 +162,20 @@ class InListPredicateBase final : public ColumnPredicate { roaring::Roaring indices; HybridSetBase::IteratorBase* iter = _values->begin(); while (iter->has_next()) { - std::unique_ptr query_param = nullptr; + Field field_value; if constexpr (is_string_type(Type)) { - // get_value() returns StringRef*, not std::string* + // HybridSet's iter->get_value() yields StringRef*, not std::string*. const auto* ref = (const StringRef*)(iter->get_value()); - T str(ref->data, ref->size); - RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( - &str, query_param)); + field_value = Field::create_field(std::string(ref->data, ref->size)); } else { const T* value = (const T*)(iter->get_value()); - RETURN_IF_ERROR(InvertedIndexQueryParamFactory::create_query_value( - value, query_param)); + field_value = Field::create_field(*value); } InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY; InvertedIndexParam param; param.column_name = name_with_type.first; param.column_type = name_with_type.second; - param.query_value = query_param->get_value(); + param.query_value = field_value; param.query_type = query_type; param.num_rows = num_rows; param.roaring = std::make_shared(); diff --git a/be/src/storage/row_cursor.cpp b/be/src/storage/row_cursor.cpp index 466ee083a1d902..ef649a6a092979 100644 --- a/be/src/storage/row_cursor.cpp +++ b/be/src/storage/row_cursor.cpp @@ -151,20 +151,6 @@ std::string RowCursor::to_string() const { return result; } -// Convert a Field value to its storage representation via PrimitiveTypeConvertor and encode. -// For most types this is an identity conversion; for DATE, DATETIME, DECIMALV2 it does -// actual conversion to the olap storage format. -template -static void encode_non_string_field(const StorageField* storage_field, const Field& f, - bool full_encode, std::string* buf) { - auto storage_val = PrimitiveTypeConvertor::to_storage_field_type(f.get()); - if (full_encode) { - storage_field->full_encode_ascending(&storage_val, buf); - } else { - storage_field->encode_ascending(&storage_val, buf); - } -} - void RowCursor::_encode_field(const StorageField* storage_field, const Field& f, bool full_encode, std::string* buf) const { FieldType ft = storage_field->type(); @@ -197,69 +183,17 @@ void RowCursor::_encode_field(const StorageField* storage_field, const Field& f, return; } - // Non-string types: convert Field value to storage format via PrimitiveTypeConvertor, - // then encode. For most types this is an identity conversion. + // Non-string scalar keys are fixed-width; their KeyCoder::encode_ascending + // ignores `index_size` and delegates to full_encode_ascending, so the + // `full_encode` flag here is a no-op and we always call the full helper. + const KeyCoder* coder = storage_field->key_coder(); switch (ft) { - case FieldType::OLAP_FIELD_TYPE_BOOL: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_TINYINT: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_SMALLINT: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_INT: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_BIGINT: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_LARGEINT: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_FLOAT: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DOUBLE: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DATE: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DATETIME: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DATEV2: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DATETIMEV2: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DECIMAL: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DECIMAL32: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DECIMAL64: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_DECIMAL256: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_IPV4: - encode_non_string_field(storage_field, f, full_encode, buf); - break; - case FieldType::OLAP_FIELD_TYPE_IPV6: - encode_non_string_field(storage_field, f, full_encode, buf); +#define CASE(FT, PT) \ + case FieldType::FT: \ + full_encode_field_as_key(f, coder, buf); \ break; + DORIS_APPLY_FOR_KEY_ENCODABLE_NON_STRING_TYPES(CASE) +#undef CASE default: LOG(FATAL) << "unsupported field type for encoding: " << int(ft); break; diff --git a/be/src/storage/types.cpp b/be/src/storage/types.cpp index c5f12ee781fad6..e2137efc3825c3 100644 --- a/be/src/storage/types.cpp +++ b/be/src/storage/types.cpp @@ -26,8 +26,6 @@ namespace doris { -void (*FieldTypeTraits::set_to_max)(void*) = nullptr; - static TypeInfoPtr create_type_info_ptr(const TypeInfo* type_info, bool should_reclaim_memory); bool is_scalar_type(FieldType field_type) { diff --git a/be/src/storage/types.h b/be/src/storage/types.h index 81fec759089b8f..e33c81751b9086 100644 --- a/be/src/storage/types.h +++ b/be/src/storage/types.h @@ -69,9 +69,6 @@ class TypeInfo { virtual ~TypeInfo() = default; virtual int cmp(const void* left, const void* right) const = 0; - virtual void set_to_max(void* buf) const = 0; - virtual void set_to_min(void* buf) const = 0; - virtual size_t size() const = 0; virtual FieldType type() const = 0; @@ -81,8 +78,6 @@ class ScalarTypeInfo : public TypeInfo { public: int cmp(const void* left, const void* right) const override { return _cmp(left, right); } - void set_to_max(void* buf) const override { _set_to_max(buf); } - void set_to_min(void* buf) const override { _set_to_min(buf); } size_t size() const override { return _size; } FieldType type() const override { return _field_type; } @@ -90,17 +85,12 @@ class ScalarTypeInfo : public TypeInfo { template ScalarTypeInfo(TypeTraitsClass t) : _cmp(TypeTraitsClass::cmp), - _set_to_max(TypeTraitsClass::set_to_max), - _set_to_min(TypeTraitsClass::set_to_min), _size(TypeTraitsClass::size), _field_type(TypeTraitsClass::type) {} private: int (*_cmp)(const void* left, const void* right); - void (*_set_to_max)(void* buf); - void (*_set_to_min)(void* buf); - const size_t _size; const FieldType _field_type; @@ -158,14 +148,6 @@ class ArrayTypeInfo : public TypeInfo { } } - void set_to_max(void* buf) const override { - DCHECK(false) << "set_to_max of list is not implemented."; - } - - void set_to_min(void* buf) const override { - DCHECK(false) << "set_to_min of list is not implemented."; - } - size_t size() const override { return sizeof(CollectionValue); } FieldType type() const override { return FieldType::OLAP_FIELD_TYPE_ARRAY; } @@ -209,14 +191,6 @@ class MapTypeInfo : public TypeInfo { } } - void set_to_max(void* buf) const override { - DCHECK(false) << "set_to_max of list is not implemented."; - } - - void set_to_min(void* buf) const override { - DCHECK(false) << "set_to_min of list is not implemented."; - } - size_t size() const override { return sizeof(MapValue); } FieldType type() const override { return FieldType::OLAP_FIELD_TYPE_MAP; } @@ -282,14 +256,6 @@ class StructTypeInfo : public TypeInfo { } } - void set_to_max(void* buf) const override { - DCHECK(false) << "set_to_max of list is not implemented."; - } - - void set_to_min(void* buf) const override { - DCHECK(false) << "set_to_min of list is not implemented."; - } - size_t size() const override { return sizeof(StructValue); } FieldType type() const override { return FieldType::OLAP_FIELD_TYPE_STRUCT; } @@ -509,14 +475,6 @@ struct BaseFieldTypeTraits : public CppTypeTraits { return 0; } } - - static inline void set_to_max(void* buf) { - set_cpp_type_value(buf, type_limit::max()); - } - - static inline void set_to_min(void* buf) { - set_cpp_type_value(buf, type_limit::min()); - } }; // Using NumericFieldtypeTraits to Derived code for FieldType::OLAP_FIELD_TYPE_XXXINT, FieldType::OLAP_FIELD_TYPE_FLOAT, @@ -538,122 +496,43 @@ struct FieldTypeTraits template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { (*(uint8_t*)buf) = 1; } - static void set_to_min(void* buf) { (*(uint8_t*)buf) = 0; } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public NumericFieldTypeTraits { - static void set_to_max(void* buf) { - *reinterpret_cast(buf) = ~((int128_t)(1) << 127); - } - static void set_to_min(void* buf) { - *reinterpret_cast(buf) = (int128_t)(1) << 127; - } -}; + : public NumericFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - *reinterpret_cast(buf) = 0xFFFFFFFF; // 255.255.255.255 - } - - static void set_to_min(void* buf) { - *reinterpret_cast(buf) = 0; // 0.0.0.0 - } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - *reinterpret_cast(buf) = -1; // ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff - } - - static void set_to_min(void* buf) { - *reinterpret_cast(buf) = 0; // :: - } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - CppType* data = reinterpret_cast(buf); - data->integer = 999999999999999999L; - data->fraction = 999999999; - } - static void set_to_min(void* buf) { - CppType* data = reinterpret_cast(buf); - data->integer = -999999999999999999; - data->fraction = -999999999; - } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - // max is 9999 * 16 * 32 + 12 * 32 + 31; - *reinterpret_cast(buf) = 5119903; - } - static void set_to_min(void* buf) { - // min is 0 * 16 * 32 + 1 * 32 + 1; - *reinterpret_cast(buf) = 33; - } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - // max is 9999 * 16 * 32 + 12 * 32 + 31; - *reinterpret_cast(buf) = MAX_DATE_V2; - } - static void set_to_min(void* buf) { - // min is 0 * 16 * 32 + 1 * 32 + 1; - *reinterpret_cast(buf) = MIN_DATE_V2; - } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - // max is 9999 * 16 * 32 + 12 * 32 + 31; - *reinterpret_cast(buf) = MAX_DATETIME_V2; - } - static void set_to_min(void* buf) { - // min is 0 * 16 * 32 + 1 * 32 + 1; - *reinterpret_cast(buf) = MIN_DATETIME_V2; - } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - // 9999-12-31 23:59:59 - *reinterpret_cast(buf) = 99991231235959L; - } - static void set_to_min(void* buf) { *reinterpret_cast(buf) = 101000000; } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits - : public BaseFieldTypeTraits { - static void set_to_max(void* buf) { - // max is 9999 * 16 * 32 + 12 * 32 + 31; - *reinterpret_cast(buf) = MAX_DATETIME_V2; - } - static void set_to_min(void* buf) { - // min is 0 * 16 * 32 + 1 * 32 + 1; - *reinterpret_cast(buf) = MIN_DATETIME_V2; - } -}; + : public BaseFieldTypeTraits {}; template <> struct FieldTypeTraits @@ -663,32 +542,15 @@ struct FieldTypeTraits auto r_slice = reinterpret_cast(right); return l_slice->compare(*r_slice); } - // Using field.set_to_max to set varchar/char,not here. - static void (*set_to_max)(void*); - - static void set_to_min(void* buf) { - auto slice = reinterpret_cast(buf); - memset(slice->data, 0, slice->size); - } }; template <> struct FieldTypeTraits - : public FieldTypeTraits { - static void set_to_min(void* buf) { - auto slice = reinterpret_cast(buf); - slice->size = 0; - } -}; + : public FieldTypeTraits {}; template <> struct FieldTypeTraits - : public FieldTypeTraits { - static void set_to_min(void* buf) { - auto slice = reinterpret_cast(buf); - slice->size = 0; - } -}; + : public FieldTypeTraits {}; template <> struct FieldTypeTraits @@ -697,16 +559,6 @@ struct FieldTypeTraits LOG(WARNING) << "can not compare JSONB values"; return -1; // always update ? } - - static void set_to_min(void* buf) { - auto slice = reinterpret_cast(buf); - slice->size = 0; - } - - static void set_to_max(void* buf) { - auto slice = reinterpret_cast(buf); - slice->size = 0; - } }; template <> diff --git a/be/test/exprs/function/function_comparison_evaluate_inverted_index_test.cpp b/be/test/exprs/function/function_comparison_evaluate_inverted_index_test.cpp index dc50d0d980385a..d903400797e195 100644 --- a/be/test/exprs/function/function_comparison_evaluate_inverted_index_test.cpp +++ b/be/test/exprs/function/function_comparison_evaluate_inverted_index_test.cpp @@ -38,14 +38,14 @@ class MockInvertedIndexReader : public segment_v2::InvertedIndexReader { } Status query(const segment_v2::IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, segment_v2::InvertedIndexQueryType query_type, + const Field& query_value, segment_v2::InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override { return Status::OK(); } Status try_query(const segment_v2::IndexQueryContextPtr& context, - const std::string& column_name, const void* query_value, + const std::string& column_name, const Field& query_value, segment_v2::InvertedIndexQueryType query_type, size_t* count) override { return Status::OK(); } diff --git a/be/test/exprs/function/function_ip_test.cpp b/be/test/exprs/function/function_ip_test.cpp index 1b36d18cfa1b09..4ac9671dfbda70 100644 --- a/be/test/exprs/function/function_ip_test.cpp +++ b/be/test/exprs/function/function_ip_test.cpp @@ -172,13 +172,13 @@ class MockIndexReader : public segment_v2::InvertedIndexReader { return segment_v2::InvertedIndexReaderType::BKD; } Status query(const segment_v2::IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, segment_v2::InvertedIndexQueryType query_type, + const Field& query_value, segment_v2::InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override { return Status::OK(); } Status try_query(const segment_v2::IndexQueryContextPtr& context, - const std::string& column_name, const void* query_value, + const std::string& column_name, const Field& query_value, segment_v2::InvertedIndexQueryType query_type, size_t* count) override { return Status::OK(); } diff --git a/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp b/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp index 93670029927b6e..b8eada222697aa 100644 --- a/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp +++ b/be/test/storage/index/inverted/compaction/util/index_compaction_utils.cpp @@ -27,6 +27,7 @@ #include "CLucene/StdHeader.h" #include "CLucene/config/repl_wchar.h" +#include "core/field.h" #include "json2pb/json_to_pb.h" #include "json2pb/pb_to_json.h" #include "storage/compaction/base_compaction.h" @@ -163,10 +164,6 @@ class IndexCompactionUtils { for (int i = 0; i < query_data.size(); i++) { Field param_value = Field::create_field(int32_t(query_data[i])); - std::unique_ptr query_param = nullptr; - EXPECT_TRUE(segment_v2::InvertedIndexQueryParamFactory::create_query_value( - PrimitiveType::TYPE_INT, ¶m_value, query_param) - .ok()); auto result = std::make_shared(); OlapReaderStatistics stats; @@ -174,7 +171,7 @@ class IndexCompactionUtils { context->stats = &stats; EXPECT_TRUE(idx_reader - ->invoke_bkd_query(context, query_param->get_value(), + ->invoke_bkd_query(context, param_value, InvertedIndexQueryType::EQUAL_QUERY, *bkd_searcher, result) .ok()); diff --git a/be/test/storage/key_coder_test.cpp b/be/test/storage/key_coder_test.cpp index 580df61a53f1d2..5362645078677c 100644 --- a/be/test/storage/key_coder_test.cpp +++ b/be/test/storage/key_coder_test.cpp @@ -25,7 +25,12 @@ #include #include +#include "core/field.h" +#include "core/types.h" #include "core/uint24.h" +#include "core/value/decimalv2_value.h" +#include "core/value/timestamptz_value.h" +#include "core/value/vdatetime_value.h" #include "gtest/gtest_pred_impl.h" #include "testutil/test_util.h" #include "util/debug_util.h" @@ -278,6 +283,353 @@ TEST_F(KeyCoderTest, test_decimal) { } } +// Encode an ascending sequence of compute-layer values via +// full_encode_field_as_key -- the same helper RowCursor::encode_key and +// the BKD inverted-index reader use -- and assert the byte order matches. +// +// Locks in the contract that PrimitiveTypeConvertor + KeyCoder together +// preserve the compute-layer ordering for every (PrimitiveType, FieldType) +// pair used as a sortable key. +// +// Why no `scale` or `frac` parameter? The contract under test is exactly +// "the encode path does not read scale/frac". They live on TabletColumn +// metadata (`_precision`, `_frac`) one layer above and never reach +// `Field::create_field`, `PrimitiveTypeConvertor`, or `KeyCoder`. +// So each subgroup below picks raw ints that *would* arise from a column at +// some hypothetical scale, labels the subgroup `scale=N` to make the human +// interpretation explicit, and asserts ordering -- the encode result is by +// construction identical regardless of which scale the column declared. +template +static void check_full_encode_preserves_order(FieldType ft, const std::vector& ascending, + const char* label) { + const KeyCoder* coder = get_key_coder(ft); + ASSERT_NE(coder, nullptr) << label; + std::vector encoded; + encoded.reserve(ascending.size()); + for (const auto& v : ascending) { + Field f = Field::create_field(v); + std::string buf; + full_encode_field_as_key(f, coder, &buf); + encoded.push_back(std::move(buf)); + } + for (size_t i = 0; i + 1 < encoded.size(); ++i) { + EXPECT_LT(encoded[i], encoded[i + 1]) << label << " idx=" << i; + } +} + +TEST_F(KeyCoderTest, full_encode_field_as_key_preserves_compute_layer_ordering) { + // pow10 = 10^scale as the underlying raw int type. Used to turn a + // human-written decimal literal "whole.frac" into the raw int the column + // would actually store at the labelled scale, so the test reads like + // raw = whole * pow10 + frac (signs aligned by the caller) + // rather than as opaque magic numbers. + auto pow10_i32 = [](int scale) { + int32_t r = 1; + for (int i = 0; i < scale; ++i) r *= 10; + return r; + }; + auto pow10_i64 = [](int scale) { + int64_t r = 1; + for (int i = 0; i < scale; ++i) r *= 10; + return r; + }; + auto pow10_i128 = [](int scale) { + int128_t r = 1; + for (int i = 0; i < scale; ++i) r *= 10; + return r; + }; + auto pow10_i256 = [](int scale) { + wide::Int256 r {1}; + for (int i = 0; i < scale; ++i) r *= wide::Int256 {10}; + return r; + }; + + // -------- DECIMAL32 (compute=Decimal32, storage=Int32) -------- + // scale=0 (DECIMAL(9,0)): whole int32 range + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL32, + {Decimal32(std::numeric_limits::min()), // INT32_MIN + Decimal32(int32_t(-99999)), // -99999 + Decimal32(int32_t(-1)), // -1 + Decimal32(int32_t(0)), // 0 + Decimal32(int32_t(1)), // 1 + Decimal32(int32_t(99999)), // 99999 + Decimal32(std::numeric_limits::max())}, // INT32_MAX + "DECIMAL32 scale=0"); + // scale=2 (DECIMAL(9,2)): raw = whole * 100 + frac + { + const int32_t s = pow10_i32(2); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL32, + {Decimal32(-123456 * s), // -123456.00 + Decimal32(-12 * s - 34), // -12.34 + Decimal32(-1), // -0.01 + Decimal32(0), // 0.00 + Decimal32(1), // 0.01 + Decimal32(12 * s + 34), // 12.34 + Decimal32(999999 * s + 99)}, // 999999.99 + "DECIMAL32 scale=2"); + } + // scale=9 (DECIMAL(9,9)): -0.999999999 .. +0.999999999 (whole always 0) + { + const int32_t s = pow10_i32(9); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL32, + {Decimal32(-(s - 1)), // -0.999999999 + Decimal32(-1), // -0.000000001 + Decimal32(0), // 0 + Decimal32(1), // 0.000000001 + Decimal32(s - 1)}, // 0.999999999 + "DECIMAL32 scale=9"); + } + + // -------- DECIMAL64 (compute=Decimal64, storage=Int64) -------- + // scale=0: whole int64 range + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL64, + {Decimal64(std::numeric_limits::min()), // INT64_MIN + Decimal64(int64_t(-1'000'000'000LL)), // -1 000 000 000 + Decimal64(int64_t(-1)), Decimal64(int64_t(0)), Decimal64(int64_t(1)), + Decimal64(int64_t(1'000'000'000LL)), // 1 000 000 000 + Decimal64(std::numeric_limits::max())}, // INT64_MAX + "DECIMAL64 scale=0"); + // scale=4 (DECIMAL(18,4)) + { + const int64_t s = pow10_i64(4); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL64, + {Decimal64(-123456 * s - 7890), // -123456.7890 + Decimal64(-int64_t(1)), // -0.0001 + Decimal64(int64_t(0)), // 0.0000 + Decimal64(int64_t(1)), // 0.0001 + Decimal64(int64_t(99'999'999'999) * s + 9999)}, // 99999999999.9999 + "DECIMAL64 scale=4"); + } + // scale=18 (DECIMAL(18,18)): whole always 0, full fractional range + { + const int64_t s = pow10_i64(18); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL64, + {Decimal64(-(s - 1)), // -0.999999999999999999 + Decimal64(int64_t(-1)), Decimal64(int64_t(0)), Decimal64(int64_t(1)), + Decimal64(s - 1)}, // 0.999999999999999999 + "DECIMAL64 scale=18"); + } + + // -------- DECIMAL128I (compute=Decimal128V3, storage=Int128) -------- + // scale=0: span ±2^100 to exercise both halves of int128. + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL128I, + {Decimal128V3(-(static_cast(1) << 100)), // -2^100 + Decimal128V3(int128_t(-1)), Decimal128V3(int128_t(0)), Decimal128V3(int128_t(1)), + Decimal128V3(static_cast(1) << 100)}, // 2^100 + "DECIMAL128I scale=0"); + // scale=10 (DECIMAL(38,10)) + { + const int128_t s = pow10_i128(10); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL128I, + {Decimal128V3(-int128_t(100) * s), // -100.0000000000 + Decimal128V3(int128_t(-1)), // -0.0000000001 + Decimal128V3(int128_t(0)), + Decimal128V3(int128_t(1)), // 0.0000000001 + Decimal128V3(int128_t(12345) * s + 6789), // 12345.0000006789 + Decimal128V3(static_cast(1'000'000'000'000'000LL) * + static_cast(1'000'000'000'000'000LL))}, // 10^30 + "DECIMAL128I scale=10"); + } + + // -------- DECIMAL256 (compute=Decimal256, storage=wide::Int256) -------- + // scale=0: span beyond int128 to exercise the upper halves of int256. + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL256, + {Decimal256(wide::Int256(-1'000'000'000'000LL)), // -10^12 + Decimal256(wide::Int256(-1)), Decimal256(wide::Int256(0)), Decimal256(wide::Int256(1)), + Decimal256(wide::Int256(1'000'000'000'000'000'000LL))}, // 10^18 + "DECIMAL256 scale=0"); + // scale=20 (DECIMAL(76,20)) + { + const wide::Int256 s = pow10_i256(20); + const wide::Int256 big = pow10_i256(36); // far beyond int128 + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL256, + {Decimal256(-big), // -10^36 + Decimal256(-s), // -1.0 + Decimal256(wide::Int256(-1)), // -1e-20 + Decimal256(wide::Int256(0)), + Decimal256(wide::Int256(1)), // 1e-20 + Decimal256(s), // 1.0 + Decimal256(big)}, // 10^36 + "DECIMAL256 scale=20"); + } + + // -------- DECIMALV2 (compute=DecimalV2Value, storage=decimal12_t) -------- + // DECIMALV2 is fixed at DECIMAL(27,9). The compute->storage conversion + // splits the int128 into {int_part, frac_part} where frac is the lower + // 9 digits (DecimalV2Value's `frac_value()` * 10^-9). Caller passes the + // two parts with matching sign. + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DECIMAL, + {DecimalV2Value::get_min_decimal(), // -999999999999999999.999999999 + DecimalV2Value(int64_t(-100), int64_t(0)), // -100.000000000 + DecimalV2Value(int64_t(-1), int64_t(-500'000'000)), // -1.500000000 + DecimalV2Value(int64_t(0), int64_t(0)), // 0 + DecimalV2Value(int64_t(1), int64_t(500'000'000)), // 1.500000000 + DecimalV2Value(int64_t(100), int64_t(0)), // 100.000000000 + DecimalV2Value::get_max_decimal()}, // 999999999999999999.999999999 + "DECIMALV2"); + + // -------- DATEV2 (compute=DateV2Value, storage=uint32) -------- + auto pack_d = [](int y, int m, int d) -> uint32_t { return uint32_t((y << 9) | (m << 5) | d); }; + check_full_encode_preserves_order>( + FieldType::OLAP_FIELD_TYPE_DATEV2, + {DateV2Value(pack_d(0001, 1, 1)), + DateV2Value(pack_d(1970, 1, 1)), + DateV2Value(pack_d(2024, 12, 31)), + DateV2Value(pack_d(9999, 12, 31))}, + "DATEV2"); + + // -------- DATETIMEV2 (compute=DateV2Value, storage=uint64) -------- + // Per vdatetime_value.h: bits = year(14)|month(4)|day(5)|hour(5)|minute(6)|second(6)|microsec(20). + // The microsecond field is always 20-bit regardless of the column's + // declared scale (0..6); it's just zero-padded for lower scales. So + // values across all scales coexist in the same uint64 address space and + // KeyCoder must keep them chronologically ordered. + auto pack_dt = [](int y, int mo, int d, int h, int mi, int s, uint32_t us = 0) -> uint64_t { + uint64_t date = (uint64_t(y) << 9) | (uint64_t(mo) << 5) | uint64_t(d); + return (date << 37) | (uint64_t(h) << 32) | (uint64_t(mi) << 26) | (uint64_t(s) << 20) | + uint64_t(us); + }; + using DTV2 = DateV2Value; + // scale=0: microsecond always zero + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DATETIMEV2, + {DTV2(pack_dt(2020, 1, 1, 12, 0, 0)), DTV2(pack_dt(2024, 3, 10, 9, 30, 0)), + DTV2(pack_dt(2024, 12, 31, 23, 59, 59))}, + "DATETIMEV2 scale=0"); + // scale=3: microsecond multiples of 1000 + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DATETIMEV2, + {DTV2(pack_dt(2024, 3, 10, 9, 30, 0, 1'000)), + DTV2(pack_dt(2024, 3, 10, 9, 30, 0, 123'000)), + DTV2(pack_dt(2024, 3, 10, 9, 30, 0, 999'000)), + DTV2(pack_dt(2024, 3, 10, 9, 30, 1, 0))}, + "DATETIMEV2 scale=3"); + // scale=6: full microsecond resolution; verifies ordering is byte-stable + // even at the boundary between us=999999 and the next-second carry. + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DATETIMEV2, + {DTV2(pack_dt(2024, 3, 10, 9, 30, 0, 0)), DTV2(pack_dt(2024, 3, 10, 9, 30, 0, 1)), + DTV2(pack_dt(2024, 3, 10, 9, 30, 0, 999'998)), + DTV2(pack_dt(2024, 3, 10, 9, 30, 0, 999'999)), + DTV2(pack_dt(2024, 3, 10, 9, 30, 1, 0))}, + "DATETIMEV2 scale=6"); + + // -------- TIMESTAMPTZ (same packing as DATETIMEV2) -------- + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ, + {TimestampTzValue(pack_dt(2020, 1, 1, 12, 0, 0)), + TimestampTzValue(pack_dt(2024, 3, 10, 9, 30, 0, 123'456)), + TimestampTzValue(pack_dt(2024, 12, 31, 23, 59, 59, 999'999))}, + "TIMESTAMPTZ"); + + // -------- BOOLEAN (compute=storage=UInt8, only {0, 1}) -------- + check_full_encode_preserves_order(FieldType::OLAP_FIELD_TYPE_BOOL, + {UInt8(0), UInt8(1)}, "BOOLEAN"); + + // -------- Plain integer keys (compute=storage) -------- + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_TINYINT, + {std::numeric_limits::min(), int8_t(-1), int8_t(0), int8_t(1), + std::numeric_limits::max()}, + "TINYINT"); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_SMALLINT, + {std::numeric_limits::min(), int16_t(-1), int16_t(0), int16_t(1), + std::numeric_limits::max()}, + "SMALLINT"); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_INT, + {std::numeric_limits::min(), int32_t(-1), int32_t(0), int32_t(1), + std::numeric_limits::max()}, + "INT"); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_BIGINT, + {std::numeric_limits::min(), int64_t(-1), int64_t(0), int64_t(1), + std::numeric_limits::max()}, + "BIGINT"); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_LARGEINT, + {-(static_cast(1) << 100), int128_t(-1), int128_t(0), int128_t(1), + static_cast(1) << 100}, + "LARGEINT"); + + // -------- FLOAT / DOUBLE (sign-magnitude flip in KeyCoder) -------- + // KeyCoderTraitsForFloat byte-encodes finite values byte-comparably; NaN and + // signed-zero ambiguity have their own dedicated tests above (FloatOrdering / + // FloatComprehensiveOrdering), so here we just exercise the typical path. + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_FLOAT, + {-std::numeric_limits::infinity(), -1e10f, -1.0f, -1e-10f, 0.0f, 1e-10f, 1.0f, + 1e10f, std::numeric_limits::infinity()}, + "FLOAT"); + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DOUBLE, + {-std::numeric_limits::infinity(), -1e100, -1.0, -1e-100, 0.0, 1e-100, 1.0, + 1e100, std::numeric_limits::infinity()}, + "DOUBLE"); + + // -------- DATE V1 (compute=VecDateTimeValue, storage=uint24_t packed) -------- + // Storage = (year << 9) | (month << 5) | day, same packing as DATEV2 just + // narrower. The PrimitiveTypeConvertor specialisation runs to_olap_date() + // to project compute -> storage. + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DATE, + {VecDateTimeValue::create_from_olap_date(pack_d(1900, 1, 1)), + VecDateTimeValue::create_from_olap_date(pack_d(1970, 1, 1)), + VecDateTimeValue::create_from_olap_date(pack_d(2024, 12, 31)), + VecDateTimeValue::create_from_olap_date(pack_d(9999, 12, 31))}, + "DATE V1"); + + // -------- DATETIME V1 (compute=VecDateTimeValue, storage=int64 decimal-packed) -------- + // Storage = YYYYMMDDhhmmss interpreted as int64 (sparse compared to V2 bit + // packing but still chronologically ordered as integers). + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_DATETIME, + {VecDateTimeValue::create_from_olap_datetime(uint64_t(19700101000000ULL)), + VecDateTimeValue::create_from_olap_datetime(uint64_t(20240310093000ULL)), + VecDateTimeValue::create_from_olap_datetime(uint64_t(20241231235959ULL)), + VecDateTimeValue::create_from_olap_datetime(uint64_t(99991231235959ULL))}, + "DATETIME V1"); + + // -------- IPV4 (uint32_t, dotted-quad big-endian view) -------- + auto ip4 = [](uint8_t a, uint8_t b, uint8_t c, uint8_t d) -> uint32_t { + return (uint32_t(a) << 24) | (uint32_t(b) << 16) | (uint32_t(c) << 8) | uint32_t(d); + }; + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_IPV4, + {ip4(0, 0, 0, 1), // 0.0.0.1 + ip4(10, 0, 0, 1), // 10.0.0.1 + ip4(127, 0, 0, 1), // 127.0.0.1 + ip4(192, 168, 0, 1), // 192.168.0.1 + ip4(255, 255, 255, 254)}, // 255.255.255.254 + "IPV4"); + + // -------- IPV6 (uint128_t, 16-byte big-endian view) -------- + auto ip6 = [](uint64_t hi, uint64_t lo) -> uint128_t { + return (static_cast(hi) << 64) | lo; + }; + check_full_encode_preserves_order( + FieldType::OLAP_FIELD_TYPE_IPV6, + {ip6(0, 1), // ::1 + ip6(0, 0x0000FFFF7F000001ULL), // ::ffff:127.0.0.1 + ip6(0x20010DB800000000ULL, 1), // 2001:db8::1 + ip6(0xFE80000000000000ULL, 1), // fe80::1 + ip6(0xFFFFFFFFFFFFFFFFULL, + 0xFFFFFFFFFFFFFFFEULL)}, // ffff:ffff:ffff:ffff:ffff:ffff:ffff:fffe + "IPV6"); +} + TEST_F(KeyCoderTest, test_char) { auto key_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_CHAR); diff --git a/be/test/storage/segment/index_reader_helper_test.cpp b/be/test/storage/segment/index_reader_helper_test.cpp index d96c4efeb13f6a..d52e036f22997a 100644 --- a/be/test/storage/segment/index_reader_helper_test.cpp +++ b/be/test/storage/segment/index_reader_helper_test.cpp @@ -63,14 +63,14 @@ class MockInvertedIndexReader : public InvertedIndexReader { } MOCK_FUNCTION Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override { return Status::OK(); } MOCK_FUNCTION Status try_query(const IndexQueryContextPtr& context, - const std::string& column_name, const void* query_value, + const std::string& column_name, const Field& query_value, InvertedIndexQueryType query_type, size_t* count) override { return Status::OK(); } diff --git a/be/test/storage/segment/inverted_index_iterator_test.cpp b/be/test/storage/segment/inverted_index_iterator_test.cpp index 7deffc0fdbdc69..cbbb910f65c549 100644 --- a/be/test/storage/segment/inverted_index_iterator_test.cpp +++ b/be/test/storage/segment/inverted_index_iterator_test.cpp @@ -54,14 +54,14 @@ class MockInvertedIndexReader : public InvertedIndexReader { } Status query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, std::shared_ptr& roaring, const InvertedIndexAnalyzerCtx* analyzer_ctx = nullptr) override { return Status::OK(); } Status try_query(const IndexQueryContextPtr& context, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, + const Field& query_value, InvertedIndexQueryType query_type, size_t* count) override { *count = 0; return Status::OK(); diff --git a/be/test/storage/segment/inverted_index_query_param_test.cpp b/be/test/storage/segment/inverted_index_query_param_test.cpp deleted file mode 100644 index 4f00d625e9b63d..00000000000000 --- a/be/test/storage/segment/inverted_index_query_param_test.cpp +++ /dev/null @@ -1,591 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include "common/status.h" -#include "core/data_type/primitive_type.h" -#include "core/field.h" -#include "storage/index/inverted/inverted_index_reader.h" - -namespace doris::segment_v2 { - -class InvertedIndexQueryParamTest : public testing::Test { -public: - void SetUp() override {} - void TearDown() override {} -}; - -// ==================== Integer Types Tests ==================== - -TEST_F(InvertedIndexQueryParamTest, TestBooleanWithField) { - auto field = Field::create_field(static_cast(1)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_BOOLEAN, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, true); -} - -TEST_F(InvertedIndexQueryParamTest, TestBooleanWithFieldFalse) { - auto field = Field::create_field(static_cast(0)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_BOOLEAN, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, false); -} - -TEST_F(InvertedIndexQueryParamTest, TestBooleanTemplateWithNativeValue) { - bool input_value = true; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, true); -} - -TEST_F(InvertedIndexQueryParamTest, TestTinyIntWithField) { - auto field = Field::create_field(static_cast(42)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_TINYINT, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, 42); -} - -TEST_F(InvertedIndexQueryParamTest, TestTinyIntTemplateWithNativeValue) { - int8_t input_value = -100; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, -100); -} - -TEST_F(InvertedIndexQueryParamTest, TestSmallIntWithField) { - auto field = Field::create_field(static_cast(1234)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_SMALLINT, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, 1234); -} - -TEST_F(InvertedIndexQueryParamTest, TestSmallIntTemplateWithNativeValue) { - int16_t input_value = -32000; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, -32000); -} - -TEST_F(InvertedIndexQueryParamTest, TestIntWithField) { - auto field = Field::create_field(static_cast(123456)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_INT, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, 123456); -} - -TEST_F(InvertedIndexQueryParamTest, TestIntTemplateWithNativeValue) { - int32_t input_value = -2147483647; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, -2147483647); -} - -TEST_F(InvertedIndexQueryParamTest, TestBigIntWithField) { - auto field = Field::create_field(static_cast(9223372036854775807LL)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_BIGINT, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, 9223372036854775807LL); -} - -TEST_F(InvertedIndexQueryParamTest, TestBigIntTemplateWithNativeValue) { - int64_t input_value = -9223372036854775807LL; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, -9223372036854775807LL); -} - -TEST_F(InvertedIndexQueryParamTest, TestLargeIntWithField) { - Int128 large_value = 12345678901234567890ULL; - auto field = Field::create_field(large_value); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_LARGEINT, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, static_cast<__int128_t>(large_value)); -} - -TEST_F(InvertedIndexQueryParamTest, TestLargeIntTemplateWithNativeValue) { - __int128_t input_value = 12345678901234567890ULL; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, input_value); -} - -// ==================== Float/Double Types Tests ==================== - -TEST_F(InvertedIndexQueryParamTest, TestFloatWithField) { - auto field = Field::create_field(static_cast(3.14f)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_FLOAT, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_FLOAT_EQ(*value, 3.14f); -} - -TEST_F(InvertedIndexQueryParamTest, TestFloatTemplateWithNativeValue) { - float input_value = -1.23456f; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_FLOAT_EQ(*value, -1.23456f); -} - -TEST_F(InvertedIndexQueryParamTest, TestDoubleWithField) { - auto field = Field::create_field(static_cast(3.14159265358979)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DOUBLE, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_DOUBLE_EQ(*value, 3.14159265358979); -} - -TEST_F(InvertedIndexQueryParamTest, TestDoubleTemplateWithNativeValue) { - double input_value = -9.87654321e10; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &input_value, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_DOUBLE_EQ(*value, -9.87654321e10); -} - -// ==================== String Types Tests ==================== - -TEST_F(InvertedIndexQueryParamTest, TestCharWithField) { - String str = "hello"; - auto field = Field::create_field(str); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_CHAR, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, "hello"); -} - -TEST_F(InvertedIndexQueryParamTest, TestVarcharWithField) { - String str = "world"; - auto field = Field::create_field(str); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_VARCHAR, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, "world"); -} - -TEST_F(InvertedIndexQueryParamTest, TestStringWithField) { - String str = "test string content"; - auto field = Field::create_field(str); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRING, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, "test string content"); -} - -TEST_F(InvertedIndexQueryParamTest, TestStringTemplateWithStringRef) { - std::string str_data = "string ref test"; - StringRef str_ref(str_data.data(), str_data.size()); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &str_ref, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, "string ref test"); -} - -TEST_F(InvertedIndexQueryParamTest, TestVarcharTemplateWithStringRef) { - std::string str_data = "varchar ref test"; - StringRef str_ref(str_data.data(), str_data.size()); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - &str_ref, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, "varchar ref test"); -} - -TEST_F(InvertedIndexQueryParamTest, TestCharTemplateWithStringRef) { - std::string str_data = "char ref test"; - StringRef str_ref(str_data.data(), str_data.size()); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(&str_ref, - param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, "char ref test"); -} - -TEST_F(InvertedIndexQueryParamTest, TestStringWithEmptyValue) { - String str = ""; - auto field = Field::create_field(str); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRING, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, ""); -} - -TEST_F(InvertedIndexQueryParamTest, TestStringWithSpecialCharacters) { - String str = "hello\nworld\t!@#$%^&*()"; - auto field = Field::create_field(str); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRING, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, "hello\nworld\t!@#$%^&*()"); -} - -// ==================== Decimal Types Tests ==================== - -TEST_F(InvertedIndexQueryParamTest, TestDecimalV2WithField) { - // DecimalV2 uses Int128 as underlying storage - Int128 dec_value = 123456789; - auto field = Field::create_field(DecimalV2Value(dec_value)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DECIMALV2, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -TEST_F(InvertedIndexQueryParamTest, TestDecimal32WithField) { - // Decimal32 uses Int64 for Field storage - Int64 dec_value = 12345; - auto field = Field::create_field(dec_value); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DECIMAL32, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -TEST_F(InvertedIndexQueryParamTest, TestDecimal64WithField) { - // Decimal64 uses Int64 for Field storage - Int64 dec_value = 123456789012; - auto field = Field::create_field(dec_value); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DECIMAL64, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -TEST_F(InvertedIndexQueryParamTest, TestDecimal128IWithField) { - // Decimal128I uses Int128 for Field storage - Int128 dec_value = 123456789012345LL; - auto field = Field::create_field(dec_value); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - PrimitiveType::TYPE_DECIMAL128I, &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -TEST_F(InvertedIndexQueryParamTest, TestDecimal256WithField) { - // Decimal256 uses Int128 for Field storage - Int128 dec_value = 123456789012345LL; - auto field = Field::create_field(Decimal(dec_value)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DECIMAL256, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -// ==================== Date/Time Types Tests ==================== - -TEST_F(InvertedIndexQueryParamTest, TestDateWithField) { - VecDateTimeValue tmp; - tmp.from_date_int64(20231205); - auto field = Field::create_field(tmp); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DATE, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -TEST_F(InvertedIndexQueryParamTest, TestDateTimeWithField) { - VecDateTimeValue tmp; - tmp.create_from_olap_datetime(20231205120000LL); - auto field = Field::create_field(tmp); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DATETIME, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -TEST_F(InvertedIndexQueryParamTest, TestDateV2WithField) { - UInt64 v = 20231205; - typename PrimitiveTypeTraits::CppType tmp; - tmp.from_date_int64(v); - auto field = Field::create_field(tmp); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DATEV2, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -TEST_F(InvertedIndexQueryParamTest, TestDateTimeV2WithField) { - UInt64 v = 20231205120000LL; - auto field = Field::create_field( - *(typename PrimitiveTypeTraits::CppType*)&v); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DATETIMEV2, - &field, param); - ASSERT_TRUE(status.ok()); - ASSERT_NE(param, nullptr); -} - -// ==================== IP Types Tests ==================== - -TEST_F(InvertedIndexQueryParamTest, TestIPv4WithField) { - auto field = Field::create_field(IPv4(3232235521)); // 192.168.0.1 - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_IPV4, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, IPv4(3232235521)); -} - -TEST_F(InvertedIndexQueryParamTest, TestIPv4TemplateWithNativeValue) { - IPv4 input_value(2130706433); // 127.0.0.1 - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(&input_value, - param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, IPv4(2130706433)); -} - -TEST_F(InvertedIndexQueryParamTest, TestIPv6WithField) { - IPv6 ipv6_value = 1; - auto field = Field::create_field(ipv6_value); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_IPV6, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, ipv6_value); -} - -TEST_F(InvertedIndexQueryParamTest, TestIPv6TemplateWithNativeValue) { - IPv6 input_value = 12345678901234567890ULL; - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(&input_value, - param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, input_value); -} - -// ==================== Unsupported Type Test ==================== - -TEST_F(InvertedIndexQueryParamTest, TestUnsupportedType) { - auto field = Field::create_field(static_cast(0)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_ARRAY, - &field, param); - ASSERT_FALSE(status.ok()); - EXPECT_EQ(status.code(), ErrorCode::NOT_IMPLEMENTED_ERROR); -} - -TEST_F(InvertedIndexQueryParamTest, TestUnsupportedTypeMap) { - auto field = Field::create_field(static_cast(0)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_MAP, - &field, param); - ASSERT_FALSE(status.ok()); -} - -TEST_F(InvertedIndexQueryParamTest, TestUnsupportedTypeStruct) { - auto field = Field::create_field(static_cast(0)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRUCT, - &field, param); - ASSERT_FALSE(status.ok()); -} - -// ==================== Edge Cases Tests ==================== - -TEST_F(InvertedIndexQueryParamTest, TestIntegerBoundaryMin) { - // Test minimum values - { - auto field = Field::create_field(static_cast(-128)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - PrimitiveType::TYPE_TINYINT, &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, -128); - } - { - auto field = Field::create_field(static_cast(-32768)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - PrimitiveType::TYPE_SMALLINT, &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, -32768); - } -} - -TEST_F(InvertedIndexQueryParamTest, TestIntegerBoundaryMax) { - // Test maximum values - { - auto field = Field::create_field(static_cast(127)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - PrimitiveType::TYPE_TINYINT, &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, 127); - } - { - auto field = Field::create_field(static_cast(32767)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value( - PrimitiveType::TYPE_SMALLINT, &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, 32767); - } -} - -TEST_F(InvertedIndexQueryParamTest, TestZeroValues) { - // Test zero values for different types - { - auto field = Field::create_field(static_cast(0)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_INT, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, 0); - } - { - auto field = Field::create_field(static_cast(0.0)); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DOUBLE, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_DOUBLE_EQ(*value, 0.0); - } -} - -TEST_F(InvertedIndexQueryParamTest, TestFloatSpecialValues) { - // Test infinity - { - auto field = Field::create_field( - static_cast(std::numeric_limits::infinity())); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DOUBLE, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_TRUE(std::isinf(*value)); - } - // Test negative infinity - { - auto field = Field::create_field( - static_cast(-std::numeric_limits::infinity())); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_DOUBLE, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_TRUE(std::isinf(*value)); - EXPECT_LT(*value, 0); - } -} - -TEST_F(InvertedIndexQueryParamTest, TestStringWithUnicodeCharacters) { - String str = "你好世界 🌍 日本語"; - auto field = Field::create_field(str); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRING, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(*value, "你好世界 🌍 日本語"); -} - -TEST_F(InvertedIndexQueryParamTest, TestLongString) { - std::string long_str(10000, 'x'); - String str(long_str); - auto field = Field::create_field(str); - std::unique_ptr param; - auto status = InvertedIndexQueryParamFactory::create_query_value(PrimitiveType::TYPE_STRING, - &field, param); - ASSERT_TRUE(status.ok()); - const auto* value = static_cast(param->get_value()); - EXPECT_EQ(value->size(), 10000); - EXPECT_EQ(*value, long_str); -} - -} // namespace doris::segment_v2 \ No newline at end of file diff --git a/be/test/storage/segment/inverted_index_reader_test.cpp b/be/test/storage/segment/inverted_index_reader_test.cpp index 3cbe01865304fb..2305833a402778 100644 --- a/be/test/storage/segment/inverted_index_reader_test.cpp +++ b/be/test/storage/segment/inverted_index_reader_test.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -28,13 +29,14 @@ #include #include +#include "core/field.h" +#include "core/value/vdatetime_value.h" #include "runtime/runtime_state.h" #include "storage/field.h" #include "storage/index/index_file_reader.h" #include "storage/index/index_file_writer.h" #include "storage/index/inverted/inverted_index_desc.h" #include "storage/index/inverted/inverted_index_iterator.h" -#include "storage/index/inverted/inverted_index_reader.h" #include "storage/index/inverted/inverted_index_writer.h" #include "storage/tablet/tablet_schema.h" #include "storage/tablet/tablet_schema_helper.h" @@ -332,7 +334,8 @@ class InvertedIndexReaderTest : public testing::Test { context->io_ctx = &io_ctx; context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = str_reader->query(context, field_name, &str_ref, + Field qp_335 = Field::create_field(std::string(str_ref.data, str_ref.size)); + auto query_status = str_reader->query(context, field_name, qp_335, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -345,7 +348,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string not_exist = "orange"; StringRef not_exist_ref(not_exist.c_str(), not_exist.length()); - query_status = str_reader->query(context, field_name, ¬_exist_ref, + Field qp_348 = Field::create_field( + std::string(not_exist_ref.data, not_exist_ref.size)); + query_status = str_reader->query(context, field_name, qp_348, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -438,7 +443,8 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = bkd_reader->query(context, field_name, &query_value, + Field qp_441 = Field::create_field(query_value); + auto query_status = bkd_reader->query(context, field_name, qp_441, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -450,7 +456,8 @@ class InvertedIndexReaderTest : public testing::Test { bitmap = std::make_shared(); int32_t less_than_value = 100; - query_status = bkd_reader->query(context, field_name, &less_than_value, + Field qp_453 = Field::create_field(less_than_value); + query_status = bkd_reader->query(context, field_name, qp_453, InvertedIndexQueryType::LESS_THAN_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -462,7 +469,8 @@ class InvertedIndexReaderTest : public testing::Test { bitmap = std::make_shared(); int32_t greater_than_value = 100; - query_status = bkd_reader->query(context, field_name, &greater_than_value, + Field qp_465 = Field::create_field(greater_than_value); + query_status = bkd_reader->query(context, field_name, qp_465, InvertedIndexQueryType::GREATER_THAN_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -521,7 +529,8 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = str_reader->query(context, field_name, &str_ref, + Field qp_524 = Field::create_field(std::string(str_ref.data, str_ref.size)); + auto query_status = str_reader->query(context, field_name, qp_524, InvertedIndexQueryType::EQUAL_QUERY, bitmap1); EXPECT_TRUE(query_status.ok()) << query_status; @@ -531,7 +540,8 @@ class InvertedIndexReaderTest : public testing::Test { // Second query with same value, should be cache hit std::shared_ptr bitmap2 = std::make_shared(); - query_status = str_reader->query(context, field_name, &str_ref, + Field qp_534 = Field::create_field(std::string(str_ref.data, str_ref.size)); + query_status = str_reader->query(context, field_name, qp_534, InvertedIndexQueryType::EQUAL_QUERY, bitmap2); EXPECT_TRUE(query_status.ok()) << query_status; @@ -589,7 +599,8 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = str_reader->query(context, field_name, &str_ref, + Field qp_592 = Field::create_field(std::string(str_ref.data, str_ref.size)); + auto query_status = str_reader->query(context, field_name, qp_592, InvertedIndexQueryType::EQUAL_QUERY, bitmap1); EXPECT_TRUE(query_status.ok()) << query_status; @@ -600,7 +611,8 @@ class InvertedIndexReaderTest : public testing::Test { std::shared_ptr bitmap2 = std::make_shared(); StringRef str_ref2(values[1].data, values[1].size); // "banana" - query_status = str_reader->query(context, field_name, &str_ref2, + Field qp_603 = Field::create_field(std::string(str_ref2.data, str_ref2.size)); + query_status = str_reader->query(context, field_name, qp_603, InvertedIndexQueryType::EQUAL_QUERY, bitmap2); EXPECT_TRUE(query_status.ok()) << query_status; @@ -657,7 +669,9 @@ class InvertedIndexReaderTest : public testing::Test { std::shared_ptr bitmap = std::make_shared(); StringRef term_ref(term.data(), term.size()); - auto status = str_reader->query(context, field_name, &term_ref, + Field qp_660 = + Field::create_field(std::string(term_ref.data, term_ref.size)); + auto status = str_reader->query(context, field_name, qp_660, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()) << status; EXPECT_EQ(1, bitmap->cardinality()); @@ -788,7 +802,8 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = str_reader->query(context, field_name, &str_ref, + Field qp_791 = Field::create_field(std::string(str_ref.data, str_ref.size)); + auto query_status = str_reader->query(context, field_name, qp_791, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -803,7 +818,9 @@ class InvertedIndexReaderTest : public testing::Test { query_term = "apple"; StringRef str_ref_a(query_term.c_str(), query_term.length()); - query_status = str_reader->query(context, field_name, &str_ref_a, + Field qp_806 = + Field::create_field(std::string(str_ref_a.data, str_ref_a.size)); + query_status = str_reader->query(context, field_name, qp_806, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -883,8 +900,10 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; + Field qp_887 = + Field::create_field(std::string(str_ref.data, str_ref.size)); auto query_status = - str_reader->query(context, field_name, &str_ref, + str_reader->query(context, field_name, qp_887, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -900,7 +919,9 @@ class InvertedIndexReaderTest : public testing::Test { query_term = "term_a"; StringRef str_ref_a(query_term.c_str(), query_term.length()); - query_status = str_reader->query(context, field_name, &str_ref_a, + Field qp_903 = + Field::create_field(std::string(str_ref_a.data, str_ref_a.size)); + query_status = str_reader->query(context, field_name, qp_903, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -916,7 +937,9 @@ class InvertedIndexReaderTest : public testing::Test { query_term = "noexist"; StringRef str_ref_no_term(query_term.c_str(), query_term.length()); - query_status = str_reader->query(context, field_name, &str_ref_no_term, + Field qp_919 = Field::create_field( + std::string(str_ref_no_term.data, str_ref_no_term.size)); + query_status = str_reader->query(context, field_name, qp_919, InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; EXPECT_EQ(bitmap->cardinality(), 0) << "V3: Should find 0 documents matching 'noexist'"; @@ -965,7 +988,9 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = str_reader->query(context, field_name, &str_ref, + Field qp_968 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto query_status = str_reader->query(context, field_name, qp_968, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -981,7 +1006,9 @@ class InvertedIndexReaderTest : public testing::Test { query_term = "term_a"; StringRef str_ref_a(query_term.c_str(), query_term.length()); - query_status = str_reader->query(context, field_name, &str_ref_a, + Field qp_984 = + Field::create_field(std::string(str_ref_a.data, str_ref_a.size)); + query_status = str_reader->query(context, field_name, qp_984, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; @@ -997,7 +1024,9 @@ class InvertedIndexReaderTest : public testing::Test { query_term = "noexist"; StringRef str_ref_no_term(query_term.c_str(), query_term.length()); - query_status = str_reader->query(context, field_name, &str_ref_no_term, + Field qp_1000 = Field::create_field( + std::string(str_ref_no_term.data, str_ref_no_term.size)); + query_status = str_reader->query(context, field_name, qp_1000, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; EXPECT_EQ(bitmap->cardinality(), 0) << "V3: Should find 0 documents matching 'noexist'"; @@ -1064,7 +1093,8 @@ class InvertedIndexReaderTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = index_reader->query(context, field_name, &str_ref, + Field qp_1067 = Field::create_field(std::string(str_ref.data, str_ref.size)); + auto query_status = index_reader->query(context, field_name, qp_1067, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); ASSERT_TRUE(query_status.ok()) << "Query failed for term '" << query_term << "' in file " @@ -2086,7 +2116,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string field_name = "1"; // c2 unique_id StringRef query_val(values[0].data, values[0].size); - Status st = mock_reader->query(context, field_name, &query_val, + Field qp_2089 = + Field::create_field(std::string(query_val.data, query_val.size)); + Status st = mock_reader->query(context, field_name, qp_2089, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_FALSE(st.ok()); @@ -2148,7 +2180,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query_term = "world"; StringRef query_val(query_term.data(), query_term.size()); - Status st = mock_reader->query(context, field_name, &query_val, + Field qp_2151 = + Field::create_field(std::string(query_val.data, query_val.size)); + Status st = mock_reader->query(context, field_name, qp_2151, InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap); EXPECT_FALSE(st.ok()); @@ -2158,7 +2192,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string phrase_query = "Apache Doris"; StringRef phrase_query_val(phrase_query.data(), phrase_query.size()); - st = mock_reader->query(context, field_name, &phrase_query_val, + Field qp_2161 = Field::create_field( + std::string(phrase_query_val.data, phrase_query_val.size)); + st = mock_reader->query(context, field_name, qp_2161, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_FALSE(st.ok()); @@ -2271,7 +2307,9 @@ class InvertedIndexReaderTest : public testing::Test { std::shared_ptr bitmap = std::make_shared(); std::string query_lower = "apple"; // lowercase StringRef str_ref(query_lower.c_str(), query_lower.length()); - auto status = str_reader->query(context, "c2", &str_ref, + Field qp_2274 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto status = str_reader->query(context, "c2", qp_2274, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); EXPECT_GT(bitmap->cardinality(), 0) << "Should find 'Apple' with lowercase query"; @@ -2311,7 +2349,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string long_query = "this_is_a_very_long_string_that_exceeds_ignore_above_limit"; StringRef str_ref(long_query.c_str(), long_query.length()); - auto status = str_reader->query(context, "c2", &str_ref, + Field qp_2314 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto status = str_reader->query(context, "c2", qp_2314, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_FALSE(status.ok()); EXPECT_EQ(status.code(), ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED); @@ -2373,7 +2413,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "quick database"; StringRef query_ref(query.c_str(), query.length()); - auto status = fulltext_reader->query(context, "c2", &query_ref, + Field qp_2376 = + Field::create_field(std::string(query_ref.data, query_ref.size)); + auto status = fulltext_reader->query(context, "c2", qp_2376, InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap); EXPECT_TRUE(status.ok()); EXPECT_GT(bitmap->cardinality(), 0) @@ -2386,7 +2428,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "search fast"; StringRef query_ref(query.c_str(), query.length()); - auto status = fulltext_reader->query(context, "c2", &query_ref, + Field qp_2389 = + Field::create_field(std::string(query_ref.data, query_ref.size)); + auto status = fulltext_reader->query(context, "c2", qp_2389, InvertedIndexQueryType::MATCH_ALL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -2397,8 +2441,10 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "quick brown"; StringRef query_ref(query.c_str(), query.length()); + Field qp = + Field::create_field(std::string(query_ref.data, query_ref.size)); auto status = fulltext_reader->query( - context, "c2", &query_ref, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); + context, "c2", qp, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -2408,7 +2454,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "sear"; StringRef query_ref(query.c_str(), query.length()); - auto status = fulltext_reader->query(context, "c2", &query_ref, + Field qp_2411 = + Field::create_field(std::string(query_ref.data, query_ref.size)); + auto status = fulltext_reader->query(context, "c2", qp_2411, InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY, bitmap); EXPECT_TRUE(status.ok()); @@ -2420,8 +2468,10 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "qu.*k"; StringRef query_ref(query.c_str(), query.length()); + Field qp = + Field::create_field(std::string(query_ref.data, query_ref.size)); auto status = fulltext_reader->query( - context, "c2", &query_ref, InvertedIndexQueryType::MATCH_REGEXP_QUERY, bitmap); + context, "c2", qp, InvertedIndexQueryType::MATCH_REGEXP_QUERY, bitmap); EXPECT_TRUE(status.ok()); } } @@ -2481,7 +2531,8 @@ class InvertedIndexReaderTest : public testing::Test { InvertedIndexParam param; param.column_name = "c2"; - param.query_value = &str_ref; + param.query_value = + Field::create_field(std::string(str_ref.data, str_ref.size)); param.query_type = InvertedIndexQueryType::EQUAL_QUERY; param.num_rows = 3; param.roaring = bitmap; @@ -2494,9 +2545,10 @@ class InvertedIndexReaderTest : public testing::Test { size_t count = 0; auto* inverted_index_iterator = static_cast(iterator.get()); inverted_index_iterator->set_context(context); + Field try_qp = Field::create_field(std::string(str_ref.data, str_ref.size)); status = inverted_index_iterator->try_read_from_inverted_index( - std::static_pointer_cast(inverted_index_reader), "c2", - &str_ref, InvertedIndexQueryType::EQUAL_QUERY, &count); + std::static_pointer_cast(inverted_index_reader), "c2", try_qp, + InvertedIndexQueryType::EQUAL_QUERY, &count); EXPECT_TRUE(status.ok()); } @@ -2568,7 +2620,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "500"; StringRef str_ref(query.c_str(), query.length()); - auto status = str_reader->query(context, "c2", &str_ref, + Field qp_2571 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto status = str_reader->query(context, "c2", qp_2571, InvertedIndexQueryType::LESS_THAN_QUERY, bitmap); // This might succeed or fail depending on the implementation limits // The important thing is we handle the potential TooManyClauses error gracefully @@ -2605,7 +2659,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string empty_query = ""; StringRef str_ref(empty_query.c_str(), empty_query.length()); - auto status = fulltext_reader->query(context, "c2", &str_ref, + Field qp_2608 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto status = fulltext_reader->query(context, "c2", qp_2608, InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap); // Should either succeed with empty result or fail gracefully } @@ -2747,7 +2803,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "500"; StringRef str_ref(query.c_str(), query.length()); - auto status = str_reader->query(context, "c2", &str_ref, + Field qp_2750 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto status = str_reader->query(context, "c2", qp_2750, InvertedIndexQueryType::LESS_THAN_QUERY, bitmap); // This might succeed or fail depending on the implementation limits // The important thing is we handle the potential TooManyClauses error gracefully @@ -2784,7 +2842,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string empty_query; StringRef str_ref(empty_query.c_str(), empty_query.length()); - auto status = fulltext_reader->query(context, "c2", &str_ref, + Field qp_2787 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto status = fulltext_reader->query(context, "c2", qp_2787, InvertedIndexQueryType::MATCH_ANY_QUERY, bitmap); // Should either succeed with empty result or fail gracefully } @@ -2835,8 +2895,10 @@ class InvertedIndexReaderTest : public testing::Test { std::string regexp_query = "test.*"; StringRef query_ref(regexp_query.c_str(), regexp_query.length()); + Field qp = + Field::create_field(std::string(query_ref.data, query_ref.size)); auto status = fulltext_reader->query( - context, "c2", &query_ref, InvertedIndexQueryType::MATCH_REGEXP_QUERY, bitmap); + context, "c2", qp, InvertedIndexQueryType::MATCH_REGEXP_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -2871,48 +2933,335 @@ class InvertedIndexReaderTest : public testing::Test { std::string query = "cherry"; StringRef str_ref(query.c_str(), query.length()); - auto status = str_reader->query(context, "c2", &str_ref, + Field qp_2874 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + auto status = str_reader->query(context, "c2", qp_2874, InvertedIndexQueryType::LESS_THAN_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test LESS_EQUAL_QUERY bitmap = std::make_shared(); - status = str_reader->query(context, "c2", &str_ref, + Field qp_2880 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + status = str_reader->query(context, "c2", qp_2880, InvertedIndexQueryType::LESS_EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test GREATER_THAN_QUERY bitmap = std::make_shared(); - status = str_reader->query(context, "c2", &str_ref, + Field qp_2886 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + status = str_reader->query(context, "c2", qp_2886, InvertedIndexQueryType::GREATER_THAN_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test GREATER_EQUAL_QUERY bitmap = std::make_shared(); - status = str_reader->query(context, "c2", &str_ref, + Field qp_2892 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + status = str_reader->query(context, "c2", qp_2892, InvertedIndexQueryType::GREATER_EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test MATCH_PHRASE_QUERY for StringType bitmap = std::make_shared(); - status = str_reader->query(context, "c2", &str_ref, + Field qp_2898 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + status = str_reader->query(context, "c2", qp_2898, InvertedIndexQueryType::MATCH_PHRASE_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test MATCH_PHRASE_PREFIX_QUERY for StringType bitmap = std::make_shared(); - status = str_reader->query(context, "c2", &str_ref, + Field qp_2904 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + status = str_reader->query(context, "c2", qp_2904, InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY, bitmap); EXPECT_TRUE(status.ok()); // Test MATCH_REGEXP_QUERY for StringType bitmap = std::make_shared(); - status = str_reader->query(context, "c2", &str_ref, + Field qp_2910 = + Field::create_field(std::string(str_ref.data, str_ref.size)); + status = str_reader->query(context, "c2", qp_2910, InvertedIndexQueryType::MATCH_REGEXP_QUERY, bitmap); EXPECT_TRUE(status.ok()); } } + // Generic BKD range-query verifier. Writes `values` into the BKD index + // for `column_name`, then runs EQUAL / LESS_THAN / LESS_EQUAL / + // GREATER_THAN / GREATER_EQUAL queries against `threshold`. Expected + // cardinalities are derived from the input `values` + `threshold` via + // std::count_if, so the caller doesn't have to keep them in sync. + // + // Locks in: + // * the typed-param interface (TypedInvertedIndexQueryParam) + // * the +/-infinity sentinels routed through type_limit + + // PrimitiveTypeConvertor + // * BKD's writer/reader/visitor agreement on KeyCoder-encoded bytes + template + void verify_bkd_range_queries(int col_id, std::string_view rowset_id, + const std::string& column_name, std::vector values, + T threshold) { + OlapReaderStatistics stats; + RuntimeState runtime_state; + io::IOContext io_ctx; + + IndexQueryContextPtr context = std::make_shared(); + context->io_ctx = &io_ctx; + context->stats = &stats; + context->runtime_state = &runtime_state; + + TabletIndex idx_meta; + std::string index_path_prefix; + prepare_bkd_index_typed(rowset_id, /*seg_id=*/0, col_id, values, &idx_meta, + &index_path_prefix); + + auto reader = std::make_shared( + io::global_local_filesystem(), index_path_prefix, InvertedIndexStorageFormatPB::V2); + EXPECT_TRUE(reader->init().ok()); + + auto bkd_reader = BkdIndexReader::create_shared(&idx_meta, reader); + EXPECT_NE(bkd_reader, nullptr); + + auto run_query = [&](InvertedIndexQueryType qt, T thr) { + using raw_t = typename PrimitiveTypeTraits::StorageFieldType; + Field qp = Field::create_field_from_olap_value(static_cast(thr)); + auto bitmap = std::make_shared(); + auto status = bkd_reader->query(context, column_name, qp, qt, bitmap); + EXPECT_TRUE(status.ok()) << column_name << ": " << status; + return bitmap->cardinality(); + }; + + const auto expect_eq = std::count_if(values.begin(), values.end(), + [&](const T& v) { return v == threshold; }); + const auto expect_lt = std::count_if(values.begin(), values.end(), + [&](const T& v) { return v < threshold; }); + const auto expect_le = std::count_if(values.begin(), values.end(), + [&](const T& v) { return v <= threshold; }); + const auto expect_gt = std::count_if(values.begin(), values.end(), + [&](const T& v) { return v > threshold; }); + const auto expect_ge = std::count_if(values.begin(), values.end(), + [&](const T& v) { return v >= threshold; }); + + EXPECT_EQ(run_query(InvertedIndexQueryType::EQUAL_QUERY, threshold), expect_eq) + << column_name << " EQUAL"; + EXPECT_EQ(run_query(InvertedIndexQueryType::LESS_THAN_QUERY, threshold), expect_lt) + << column_name << " LESS_THAN (relies on encode_min sentinel)"; + EXPECT_EQ(run_query(InvertedIndexQueryType::LESS_EQUAL_QUERY, threshold), expect_le) + << column_name << " LESS_EQUAL (relies on encode_min sentinel)"; + EXPECT_EQ(run_query(InvertedIndexQueryType::GREATER_THAN_QUERY, threshold), expect_gt) + << column_name << " GREATER_THAN (relies on encode_max sentinel)"; + EXPECT_EQ(run_query(InvertedIndexQueryType::GREATER_EQUAL_QUERY, threshold), expect_ge) + << column_name << " GREATER_EQUAL (relies on encode_max sentinel)"; + } + + // Per-type wrappers. col_id values match create_comprehensive_schema() + // (commented-out c_double/c_float shift later indices, hence date=4, + // datetime=5, decimal=6, bool=7, tinyint=8, smallint=9, largeint=10, + // datev2=12, datetimev2=13, timestamptz=14). + void test_bkd_range_int() { + // INT32 row counts: -1000 (loss), 0 (balance), 42, 100, 200, 300. + verify_bkd_range_queries( + /*col_id=*/0, "bkd_range_int", "c_int", {-1000, 0, 42, 100, 200, 300}, + /*threshold=*/100); + } + void test_bkd_range_bigint() { + // INT64 nanosecond timestamps (relative epoch deltas). + verify_bkd_range_queries( + /*col_id=*/1, "bkd_range_bigint", "c_bigint", + {-1'000'000LL, 0LL, 1'500LL, 1'000'000LL, 1'000'000'000LL, 100'000'000'000LL}, + /*threshold=*/1'000'000LL); + } + void test_bkd_range_smallint() { + // INT16 range: -32768..32767, e.g. signed short port deltas. + verify_bkd_range_queries( + /*col_id=*/9, "bkd_range_smallint", "c_smallint", + {int16_t(-32768), int16_t(-1024), int16_t(-1), int16_t(0), int16_t(8080), + int16_t(32767)}, + /*threshold=*/int16_t(0)); + } + void test_bkd_range_tinyint() { + // INT8 range: -128..127, e.g. log-level / tinyint flags. + verify_bkd_range_queries( + /*col_id=*/8, "bkd_range_tinyint", "c_tinyint", + {int8_t(-128), int8_t(-10), int8_t(-1), int8_t(0), int8_t(50), int8_t(127)}, + /*threshold=*/int8_t(0)); + } + void test_bkd_range_largeint() { + // INT128. Spans negative through ~10^12 to exercise the high half. + verify_bkd_range_queries( + /*col_id=*/10, "bkd_range_largeint", "c_largeint", + {static_cast<__int128_t>(-1'000'000), static_cast<__int128_t>(-1), + static_cast<__int128_t>(0), static_cast<__int128_t>(1), + static_cast<__int128_t>(1'000'000), static_cast<__int128_t>(1'000'000'000'000LL)}, + /*threshold=*/static_cast<__int128_t>(0)); + } + void test_bkd_range_decimalv2() { + // Real DecimalV2 (DECIMAL(27,9)) literals: -100.0, 0.0, 42.5, 100.0, 200.0, 300.0 + // (decimal12_t.fraction is scaled by 10^9). + verify_bkd_range_queries( + /*col_id=*/6, "bkd_range_decimalv2", "c_decimal", + {decimal12_t {-100, 0}, decimal12_t {0, 0}, decimal12_t {42, 500'000'000}, + decimal12_t {100, 0}, decimal12_t {200, 0}, decimal12_t {300, 0}}, + /*threshold=*/decimal12_t {100, 0}); + } + void test_bkd_range_date() { + // DATE in OLAP packed format `(year << 9) | (month << 5) | day`: + // 2020-01-01, 2021-06-15, 2023-03-10, 2024-12-31, 2026-08-08, 2030-01-01 + auto pack_date = [](int y, int m, int d) -> uint24_t { + return uint24_t(static_cast((y << 9) | (m << 5) | d)); + }; + verify_bkd_range_queries( + /*col_id=*/4, "bkd_range_date", "c_date", + {pack_date(2020, 1, 1), pack_date(2021, 6, 15), pack_date(2023, 3, 10), + pack_date(2024, 12, 31), pack_date(2026, 8, 8), pack_date(2030, 1, 1)}, + /*threshold=*/pack_date(2024, 12, 31)); + } + void test_bkd_range_datetime() { + // OLAP DATETIME packs as decimal YYYYMMDDhhmmss (see VecDateTimeValue:: + // to_olap_datetime). TypedInvertedIndexQueryParam::storage_val + // is int64_t (conditional_t override) to line up with KeyCoder's + // signed view. + auto dt = [](int y, int mo, int d, int h, int mi, int s) -> int64_t { + return static_cast((static_cast(y) * 10000 + mo * 100 + d) * + 1000000ULL + + static_cast(h) * 10000 + mi * 100 + s); + }; + verify_bkd_range_queries( + /*col_id=*/5, "bkd_range_datetime", "c_datetime", + {dt(2020, 1, 1, 12, 0, 0), dt(2021, 1, 1, 12, 0, 0), dt(2022, 6, 15, 15, 0, 0), + dt(2024, 3, 10, 9, 30, 0), dt(2025, 12, 25, 0, 0, 0), dt(2030, 1, 1, 12, 0, 0)}, + /*threshold=*/dt(2024, 3, 10, 9, 30, 0)); + } + void test_bkd_range_datev2() { + // DateV2 packed format: bits [0..4]=day, [5..8]=month, [9..23]=year. + auto pack_datev2 = [](int y, int m, int d) -> uint32_t { + return static_cast((y << 9) | (m << 5) | d); + }; + verify_bkd_range_queries( + /*col_id=*/12, "bkd_range_datev2", "c_datev2", + {pack_datev2(2020, 1, 1), pack_datev2(2021, 6, 15), pack_datev2(2023, 3, 10), + pack_datev2(2024, 12, 31), pack_datev2(2026, 8, 8), pack_datev2(2030, 1, 1)}, + /*threshold=*/pack_datev2(2024, 12, 31)); + } + // DateTimeV2 / TimestampTz packing per vdatetime_value.h: + // [date_v2 << 37] | [hour << 32] | [minute << 26] | [second << 20] | microsecond + // date_v2 = (year << 9) | (month << 5) | day + static uint64_t pack_dtv2(int y, int mo, int d, int h, int mi, int s, int us = 0) { + uint64_t date = (static_cast(y) << 9) | (static_cast(mo) << 5) | d; + return (date << 37) | (static_cast(h) << 32) | (static_cast(mi) << 26) | + (static_cast(s) << 20) | us; + } + void test_bkd_range_datetimev2() { + verify_bkd_range_queries( + /*col_id=*/13, "bkd_range_datetimev2", "c_datetimev2", + {pack_dtv2(2020, 1, 1, 12, 0, 0), pack_dtv2(2021, 6, 15, 15, 0, 0), + pack_dtv2(2023, 3, 10, 9, 30, 0), pack_dtv2(2024, 12, 31, 23, 59, 59), + pack_dtv2(2026, 8, 8, 8, 8, 8), pack_dtv2(2030, 1, 1, 12, 0, 0)}, + /*threshold=*/pack_dtv2(2024, 12, 31, 23, 59, 59)); + } + void test_bkd_range_timestamptz() { + // TimestampTzValue storage = uint64_t with the same DateTimeV2 packing + // (the TZ offset lives outside the BKD-indexed key). + verify_bkd_range_queries( + /*col_id=*/14, "bkd_range_timestamptz", "c_timestamptz", + {pack_dtv2(2020, 1, 1, 12, 0, 0), pack_dtv2(2021, 6, 15, 15, 0, 0), + pack_dtv2(2023, 3, 10, 9, 30, 0), pack_dtv2(2024, 12, 31, 23, 59, 59), + pack_dtv2(2026, 8, 8, 8, 8, 8), pack_dtv2(2030, 1, 1, 12, 0, 0)}, + /*threshold=*/pack_dtv2(2024, 12, 31, 23, 59, 59)); + } + void test_bkd_range_bool() { + // Storage = uint8_t. With duplicates {false,false,false,true,true,true} + // threshold=false means LT=0 / LE=3 / GT=3 / GE=6. + verify_bkd_range_queries( + /*col_id=*/7, "bkd_range_bool", "c_bool", + {uint8_t(0), uint8_t(0), uint8_t(0), uint8_t(1), uint8_t(1), uint8_t(1)}, + /*threshold=*/uint8_t(0)); + } + void test_bkd_range_float() { + // FLOAT real values: ~-100.5 (negative offset), -1.25, 0.0, π + // approximated, 100.25, 1234.5 (mid-positive). + verify_bkd_range_queries( + /*col_id=*/15, "bkd_range_float", "c_float", + {-100.5f, -1.25f, 0.0f, 3.14159f, 100.25f, 1234.5f}, + /*threshold=*/3.14159f); + } + void test_bkd_range_double() { + // DOUBLE real values across magnitudes from -1e10 to +1e10, including + // negative scientific, π, and large positive. + verify_bkd_range_queries( + /*col_id=*/16, "bkd_range_double", "c_double", + {-9.87654321e10, -1.5, 0.0, 3.14159265358979, 1.0e6, 1.0e10}, + /*threshold=*/3.14159265358979); + } + void test_bkd_range_decimal32() { + // DECIMAL(9, 2). Storage = real_value × 10^2. + auto d = [](double v) { return static_cast(std::llround(v * 100)); }; + verify_bkd_range_queries( + /*col_id=*/17, "bkd_range_decimal32", "c_decimal32", + {d(-1.00), d(-0.01), d(0.00), d(1.23), d(9999.99), d(999999.99)}, + /*threshold=*/d(1.23)); + } + void test_bkd_range_decimal64() { + // DECIMAL(18, 4). Storage = real_value × 10^4. + auto d = [](double v) { return static_cast(std::llround(v * 10000)); }; + verify_bkd_range_queries( + /*col_id=*/18, "bkd_range_decimal64", "c_decimal64", + {d(-100.0), d(0.0), d(0.0123), d(12345.6789), d(99999999.9999), d(9999999999.9999)}, + /*threshold=*/d(12345.6789)); + } + void test_bkd_range_decimal128i() { + // DECIMAL(38, 10) stored as Int128. Values: + // -100.0000000000, -0.0000000001, 0, 1.2345678900, + // 12345.6789012345, 1e30 (ledger-scale). + verify_bkd_range_queries( + /*col_id=*/19, "bkd_range_decimal128i", "c_decimal128i", + {static_cast<__int128_t>(-1'000'000'000'000LL), static_cast<__int128_t>(-1), + static_cast<__int128_t>(0), static_cast<__int128_t>(12'345'678'900LL), + static_cast<__int128_t>(123'456'789'012'345LL), + static_cast<__int128_t>(1'000'000'000'000'000LL) * + static_cast<__int128_t>(1'000'000'000'000'000LL)}, + /*threshold=*/static_cast<__int128_t>(12'345'678'900LL)); + } + void test_bkd_range_decimal256() { + // DECIMAL(76, 20) stored as wide::Int256. Use scaled integers spanning + // a representative range from -1e6 up to 10^18. + verify_bkd_range_queries( + /*col_id=*/20, "bkd_range_decimal256", "c_decimal256", + {wide::Int256(-1'000'000), wide::Int256(-1), wide::Int256(0), + wide::Int256(123'456'789), wide::Int256(123'456'789'012'345LL), + wide::Int256(1'000'000'000'000'000'000LL)}, + /*threshold=*/wide::Int256(123'456'789)); + } + void test_bkd_range_ipv4() { + // Real IPv4 addresses. uint32_t encoding = (a<<24)|(b<<16)|(c<<8)|d: + // 0.0.0.1, 10.0.0.1, 127.0.0.1, 192.168.0.1, 192.168.0.254, 255.255.255.254 + verify_bkd_range_queries( + /*col_id=*/21, "bkd_range_ipv4", "c_ipv4", + {0x00000001U, 0x0A000001U, 0x7F000001U, 0xC0A80001U, 0xC0A800FEU, 0xFFFFFFFEU}, + /*threshold=*/0xC0A80001U); // 192.168.0.1 + } + void test_bkd_range_ipv6() { + // Real IPv6 addresses (uint128_t = 16-byte big-endian view): + // ::1 (loopback) + // ::ffff:7f00:0001 (IPv4-mapped 127.0.0.1) + // 2001:db8::1 (documentation prefix) + // 2001:db8:1::1 + // fe80::1 (link-local) + // ffff:ffff:: (last valid) + auto ipv6 = [](uint64_t hi, uint64_t lo) -> uint128_t { + return (static_cast(hi) << 64) | lo; + }; + verify_bkd_range_queries( + /*col_id=*/22, "bkd_range_ipv6", "c_ipv6", + {ipv6(0, 1), ipv6(0, 0x0000FFFF7F000001ULL), ipv6(0x20010DB800000000ULL, 1), + ipv6(0x20010DB800010000ULL, 1), ipv6(0xFE80000000000000ULL, 1), + ipv6(0xFFFFFFFF00000000ULL, 0)}, + /*threshold=*/ipv6(0x20010DB800000000ULL, 1)); // 2001:db8::1 + } + // Test BKD specific uncovered paths void test_bkd_uncovered_paths() { std::string_view rowset_id = "test_bkd_uncovered"; @@ -2953,13 +3302,14 @@ class InvertedIndexReaderTest : public testing::Test { for (auto& test_case : test_cases) { // Test try_query path size_t count = 0; - auto status = bkd_reader->try_query(context, "c1", &test_case.second, test_case.first, - &count); + Field qp_2956 = Field::create_field(test_case.second); + auto status = bkd_reader->try_query(context, "c1", qp_2956, test_case.first, &count); EXPECT_TRUE(status.ok()) << "Try query type: " << static_cast(test_case.first); // Test actual query path std::shared_ptr bitmap = std::make_shared(); - status = bkd_reader->query(context, "c1", &test_case.second, test_case.first, bitmap); + Field qp_2962 = Field::create_field(test_case.second); + status = bkd_reader->query(context, "c1", qp_2962, test_case.first, bitmap); EXPECT_TRUE(status.ok()) << "Query type: " << static_cast(test_case.first); } @@ -2968,13 +3318,15 @@ class InvertedIndexReaderTest : public testing::Test { int32_t max_value = 100; // Greater than maximum in data std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c1", &min_value, + Field qp_2971 = Field::create_field(min_value); + auto status = bkd_reader->query(context, "c1", qp_2971, InvertedIndexQueryType::GREATER_THAN_QUERY, bitmap); EXPECT_TRUE(status.ok()); bitmap = std::make_shared(); - status = bkd_reader->query(context, "c1", &max_value, - InvertedIndexQueryType::LESS_THAN_QUERY, bitmap); + Field qp_2976 = Field::create_field(max_value); + status = bkd_reader->query(context, "c1", qp_2976, InvertedIndexQueryType::LESS_THAN_QUERY, + bitmap); EXPECT_TRUE(status.ok()); } @@ -3023,7 +3375,7 @@ class InvertedIndexReaderTest : public testing::Test { // This should trigger the bypass logic due to low threshold InvertedIndexParam param; param.column_name = "c1"; - param.query_value = &query_value; + param.query_value = Field::create_field(query_value); param.query_type = InvertedIndexQueryType::LESS_THAN_QUERY; param.num_rows = 5; param.roaring = bitmap; @@ -3037,7 +3389,7 @@ class InvertedIndexReaderTest : public testing::Test { bitmap = std::make_shared(); InvertedIndexParam param1; param1.column_name = "c1"; - param1.query_value = &query_value; + param1.query_value = Field::create_field(query_value); param1.query_type = InvertedIndexQueryType::EQUAL_QUERY; param1.num_rows = 5; param1.roaring = bitmap; @@ -3047,10 +3399,11 @@ class InvertedIndexReaderTest : public testing::Test { // Test try_read_from_inverted_index with non-BKD compatible query size_t count = 0; + Field try_qp = Field::create_field(query_value); status = inverted_index_iterator->try_read_from_inverted_index( std::static_pointer_cast( iterator->get_reader(InvertedIndexReaderType::STRING_TYPE)), - "c1", &query_value, InvertedIndexQueryType::MATCH_ANY_QUERY, &count); + "c1", try_qp, InvertedIndexQueryType::MATCH_ANY_QUERY, &count); EXPECT_TRUE(status.ok()); // Should succeed but not do anything for non-BKD queries } @@ -3080,6 +3433,16 @@ class InvertedIndexReaderTest : public testing::Test { {"c_datev2", FieldType::OLAP_FIELD_TYPE_DATEV2, 4, false}, {"c_datetimev2", FieldType::OLAP_FIELD_TYPE_DATETIMEV2, 8, false}, {"c_timestamptz", FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ, 8, false}, + // Appended (col_id 15..) — keep new entries here so existing + // col_id references in older tests remain stable. + {"c_float", FieldType::OLAP_FIELD_TYPE_FLOAT, 4, false}, // 15 + {"c_double", FieldType::OLAP_FIELD_TYPE_DOUBLE, 8, false}, // 16 + {"c_decimal32", FieldType::OLAP_FIELD_TYPE_DECIMAL32, 4, false}, // 17 + {"c_decimal64", FieldType::OLAP_FIELD_TYPE_DECIMAL64, 8, false}, // 18 + {"c_decimal128i", FieldType::OLAP_FIELD_TYPE_DECIMAL128I, 16, false}, // 19 + {"c_decimal256", FieldType::OLAP_FIELD_TYPE_DECIMAL256, 32, false}, // 20 + {"c_ipv4", FieldType::OLAP_FIELD_TYPE_IPV4, 4, false}, // 21 + {"c_ipv6", FieldType::OLAP_FIELD_TYPE_IPV6, 16, false}, // 22 }; for (size_t i = 0; i < columns.size(); ++i) { @@ -3189,8 +3552,8 @@ class InvertedIndexReaderTest : public testing::Test { for (auto& test_case : test_cases) { std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_int", &test_case.second, - test_case.first, bitmap); + Field qp_3192 = Field::create_field(test_case.second); + auto status = bkd_reader->query(context, "c_int", qp_3192, test_case.first, bitmap); EXPECT_TRUE(status.ok()) << "Query type: " << static_cast(test_case.first); if (test_case.first == InvertedIndexQueryType::EQUAL_QUERY) { @@ -3201,8 +3564,9 @@ class InvertedIndexReaderTest : public testing::Test { for (auto& test_case : test_cases) { size_t count = 0; - auto status = bkd_reader->try_query(context, "c_int", &test_case.second, - test_case.first, &count); + Field qp_3204 = Field::create_field(test_case.second); + auto status = + bkd_reader->try_query(context, "c_int", qp_3204, test_case.first, &count); EXPECT_TRUE(status.ok()) << "Try query type: " << static_cast(test_case.first); } } @@ -3226,7 +3590,8 @@ class InvertedIndexReaderTest : public testing::Test { int64_t query_value = 1000000LL; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_bigint", &query_value, + Field qp_3229 = Field::create_field(query_value); + auto status = bkd_reader->query(context, "c_bigint", qp_3229, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); EXPECT_EQ(bitmap->cardinality(), 1); @@ -3259,8 +3624,10 @@ class InvertedIndexReaderTest : public testing::Test { for (auto& test_case : test_cases) { std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_timestamptz", &test_case.second, - test_case.first, bitmap); + Field qp_3262 = + Field::create_field_from_olap_value(test_case.second); + auto status = bkd_reader->query(context, "c_timestamptz", qp_3262, test_case.first, + bitmap); EXPECT_TRUE(status.ok()) << "Query type: " << static_cast(test_case.first); if (test_case.first == InvertedIndexQueryType::EQUAL_QUERY) { @@ -3271,7 +3638,9 @@ class InvertedIndexReaderTest : public testing::Test { for (auto& test_case : test_cases) { size_t count = 0; - auto status = bkd_reader->try_query(context, "c_timestamptz", &test_case.second, + Field qp_3274 = + Field::create_field_from_olap_value(test_case.second); + auto status = bkd_reader->try_query(context, "c_timestamptz", qp_3274, test_case.first, &count); EXPECT_TRUE(status.ok()) << "Try query type: " << static_cast(test_case.first); } @@ -3297,7 +3666,9 @@ class InvertedIndexReaderTest : public testing::Test { double query_value = 3.14; std::shared_ptr bitmap = std::make_shared(); auto status = - bkd_reader->query(&io_ctx, &stats, &runtime_state, "c_double", &query_value, + auto qp_3300 = TypedInvertedIndexQueryParam::create_unique(); + qp_3300->set_value(&stats); + bkd_reader->query(&io_ctx, qp_3300.get(), &runtime_state, "c_double", &query_value, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); EXPECT_EQ(bitmap->cardinality(), 1); @@ -3323,7 +3694,9 @@ class InvertedIndexReaderTest : public testing::Test { float query_value = 1.5f; std::shared_ptr bitmap = std::make_shared(); auto status = - bkd_reader->query(&io_ctx, &stats, &runtime_state, "c_float", &query_value, + auto qp_3326 = TypedInvertedIndexQueryParam::create_unique(); + qp_3326->set_value(&stats); + bkd_reader->query(&io_ctx, qp_3326.get(), &runtime_state, "c_float", &query_value, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); EXPECT_EQ(bitmap->cardinality(), 1); @@ -3363,7 +3736,10 @@ class InvertedIndexReaderTest : public testing::Test { uint32_t query_value = 20240102; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_date", &query_value, + // TYPE_DATE storage is uint24_t — narrow from the test's uint32_t. + typename PrimitiveTypeTraits::StorageFieldType date_storage(query_value); + Field qp_3366 = Field::create_field_from_olap_value(date_storage); + auto status = bkd_reader->query(context, "c_date", qp_3366, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3386,9 +3762,11 @@ class InvertedIndexReaderTest : public testing::Test { auto bkd_reader = BkdIndexReader::create_shared(&idx_meta, reader); EXPECT_NE(bkd_reader, nullptr); - uint64_t query_value = 20240101130000ULL; + int64_t query_value = 20240101130000LL; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_datetime", &query_value, + Field qp_3391 = Field::create_field_from_olap_value( + static_cast(query_value)); + auto status = bkd_reader->query(context, "c_datetime", qp_3391, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3412,7 +3790,10 @@ class InvertedIndexReaderTest : public testing::Test { bool query_value = true; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_bool", &query_value, + // TYPE_BOOLEAN storage is uint8_t. + uint8_t bool_storage = query_value ? 1 : 0; + Field qp_3415 = Field::create_field(bool_storage); + auto status = bkd_reader->query(context, "c_bool", qp_3415, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3436,7 +3817,8 @@ class InvertedIndexReaderTest : public testing::Test { int8_t query_value = 1; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_tinyint", &query_value, + Field qp_3439 = Field::create_field(query_value); + auto status = bkd_reader->query(context, "c_tinyint", qp_3439, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3460,7 +3842,8 @@ class InvertedIndexReaderTest : public testing::Test { int16_t query_value = 1000; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_smallint", &query_value, + Field qp_3463 = Field::create_field(query_value); + auto status = bkd_reader->query(context, "c_smallint", qp_3463, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3484,7 +3867,8 @@ class InvertedIndexReaderTest : public testing::Test { __int128 query_value = 0; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_largeint", &query_value, + Field qp_3487 = Field::create_field(query_value); + auto status = bkd_reader->query(context, "c_largeint", qp_3487, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3508,7 +3892,8 @@ class InvertedIndexReaderTest : public testing::Test { uint32_t query_value = 20240202; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_datev2", &query_value, + Field qp_3511 = Field::create_field(query_value); + auto status = bkd_reader->query(context, "c_datev2", qp_3511, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3533,7 +3918,8 @@ class InvertedIndexReaderTest : public testing::Test { uint64_t query_value = 20240201130000ULL; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_datetimev2", &query_value, + Field qp_3536 = Field::create_field(query_value); + auto status = bkd_reader->query(context, "c_datetimev2", qp_3536, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3558,7 +3944,8 @@ class InvertedIndexReaderTest : public testing::Test { uint64_t query_value = 20240201130000ULL; std::shared_ptr bitmap = std::make_shared(); - auto status = bkd_reader->query(context, "c_timestamptz", &query_value, + Field qp_3561 = Field::create_field_from_olap_value(query_value); + auto status = bkd_reader->query(context, "c_timestamptz", qp_3561, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()); } @@ -3644,8 +4031,9 @@ class InvertedIndexReaderTest : public testing::Test { std::string query_value = "test"; std::shared_ptr bitmap = std::make_shared(); + Field qp_unsupp = Field::create_field(query_value); auto query_status = - bkd_reader->query(context, "c_unsupported", &query_value, + bkd_reader->query(context, "c_unsupported", qp_unsupp, InvertedIndexQueryType::EQUAL_QUERY, bitmap); // This might fail due to unsupported type, which is what we want to test } @@ -3673,6 +4061,116 @@ TEST_F(InvertedIndexReaderTest, BkdIndexRead) { test_bkd_index_read(); } +// BKD half-bounded range query regression suite, one TEST_F per BKD-supported +// PrimitiveType. They all share `verify_bkd_range_queries`, which: +// - writes 6 sorted values into a fresh BKD index +// - asserts EQUAL / LESS_THAN / LESS_EQUAL / GREATER_THAN / GREATER_EQUAL +// cardinalities derived from the values via std::count_if. +// +// Locks in the typed-param interface, the +/-infinity sentinels routed +// through type_limit + PrimitiveTypeConvertor, and BKD +// writer/reader/visitor agreement. +TEST_F(InvertedIndexReaderTest, BkdRangeIntRangeQuery) { + test_bkd_range_int(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeBigIntRangeQuery) { + test_bkd_range_bigint(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeSmallIntRangeQuery) { + test_bkd_range_smallint(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeTinyIntRangeQuery) { + test_bkd_range_tinyint(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeLargeIntRangeQuery) { + test_bkd_range_largeint(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDecimalV2RangeQuery) { + test_bkd_range_decimalv2(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDateRangeQuery) { + test_bkd_range_date(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDateTimeRangeQuery) { + test_bkd_range_datetime(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDateV2RangeQuery) { + test_bkd_range_datev2(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDateTimeV2RangeQuery) { + test_bkd_range_datetimev2(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeTimestampTzRangeQuery) { + test_bkd_range_timestamptz(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeBoolRangeQuery) { + test_bkd_range_bool(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeFloatRangeQuery) { + test_bkd_range_float(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDoubleRangeQuery) { + test_bkd_range_double(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDecimal32RangeQuery) { + test_bkd_range_decimal32(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDecimal64RangeQuery) { + test_bkd_range_decimal64(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDecimal128IRangeQuery) { + test_bkd_range_decimal128i(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeDecimal256RangeQuery) { + test_bkd_range_decimal256(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeIPv4RangeQuery) { + test_bkd_range_ipv4(); +} +TEST_F(InvertedIndexReaderTest, BkdRangeIPv6RangeQuery) { + test_bkd_range_ipv6(); +} + +// Verifies that KeyCoder produces byte-identical +// output regardless of whether the input pointer is to int64_t or uint64_t. +// This is what makes TypedInvertedIndexQueryParam::storage_val +// = int64_t (signed view) and the historic uint64_t storage interchangeable for +// real datetime values: KeyCoder reads bit pattern via memcpy, then sign-flips +// based on its own CppType (= int64_t) — so signed/unsigned at the call site +// doesn't change the encoded bytes as long as bit patterns agree. +TEST(KeyCoderDateTimeTest, ByteIdenticalForSignedAndUnsignedInput) { + const auto* coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_DATETIME); + ASSERT_NE(coder, nullptr); + + // Mix realistic datetimes with the boundary values that drive sentinels. + constexpr int64_t kCases[] = { + 10101000000LL, // 0001-01-01 00:00:00 (smallest valid set_to_min sentinel) + 20200101120000LL, // 2020-01-01 12:00:00 + 20240310093000LL, // 2024-03-10 09:30:00 + 99991231235959LL, // 9999-12-31 23:59:59 (largest valid set_to_max sentinel) + std::numeric_limits::max(), // type_limit max sentinel + 0LL, // type_limit min (also = INT64_MIN's bit-flipped image) + std::numeric_limits::lowest(), + }; + + for (int64_t case_val : kCases) { + int64_t signed_val = case_val; + uint64_t unsigned_val; + std::memcpy(&unsigned_val, &signed_val, sizeof(unsigned_val)); + + std::string signed_buf; + std::string unsigned_buf; + coder->full_encode_ascending(&signed_val, &signed_buf); + coder->full_encode_ascending(&unsigned_val, &unsigned_buf); + + ASSERT_EQ(signed_buf.size(), sizeof(int64_t)); + ASSERT_EQ(unsigned_buf.size(), sizeof(uint64_t)); + EXPECT_EQ(signed_buf, unsigned_buf) + << "DATETIME KeyCoder must produce identical bytes for value " + << static_cast(case_val) << " regardless of pointer type"; + } +} + // Query cache test TEST_F(InvertedIndexReaderTest, QueryCache) { test_query_cache(); diff --git a/be/test/storage/segment/inverted_index_writer_test.cpp b/be/test/storage/segment/inverted_index_writer_test.cpp index 04742d5dee06fe..c91c44b0911ea3 100644 --- a/be/test/storage/segment/inverted_index_writer_test.cpp +++ b/be/test/storage/segment/inverted_index_writer_test.cpp @@ -33,6 +33,7 @@ #include "core/data_type/data_type_factory.hpp" #include "core/data_type/data_type_number.h" +#include "core/field.h" #include "io/fs/local_file_system.h" #include "runtime/runtime_state.h" #include "storage/field.h" @@ -176,7 +177,8 @@ class InvertedIndexWriterTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto status = bkd_reader->query(context, "c1", &values[i], + Field qp = Field::create_field(values[i]); + auto status = bkd_reader->query(context, "c1", qp, doris::segment_v2::InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(status.ok()) << status; @@ -202,7 +204,8 @@ class InvertedIndexWriterTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto status = bkd_reader->query(context, "c1", &test_value, + Field test_qp = Field::create_field(test_value); + auto status = bkd_reader->query(context, "c1", test_qp, doris::segment_v2::InvertedIndexQueryType::LESS_THAN_QUERY, less_than_bitmap); EXPECT_TRUE(status.ok()) << status; @@ -221,7 +224,7 @@ class InvertedIndexWriterTest : public testing::Test { // Test GREATER_THAN query std::shared_ptr greater_than_bitmap = std::make_shared(); - status = bkd_reader->query(context, "c1", &test_value, + status = bkd_reader->query(context, "c1", test_qp, doris::segment_v2::InvertedIndexQueryType::GREATER_THAN_QUERY, greater_than_bitmap); EXPECT_TRUE(status.ok()) << status; @@ -739,7 +742,8 @@ class InvertedIndexWriterTest : public testing::Test { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status = inverted_reader->query(context, field_name, &str_ref, + Field qp = Field::create_field(std::string(str_ref.data, str_ref.size)); + auto query_status = inverted_reader->query(context, field_name, qp, InvertedIndexQueryType::EQUAL_QUERY, bitmap); EXPECT_TRUE(query_status.ok()) << query_status; // For regular strings, both should work the same @@ -945,13 +949,13 @@ TEST_F(InvertedIndexWriterTest, CompareUnicodeStringWriteResults) { context->stats = &stats; context->runtime_state = &runtime_state; - auto query_status_enabled = - inverted_reader_enabled->query(context, field_name, &values[i], - InvertedIndexQueryType::EQUAL_QUERY, bitmap_enabled); + StringRef str_ref(values[i].data, values[i].size); + Field qp = Field::create_field(std::string(str_ref.data, str_ref.size)); + auto query_status_enabled = inverted_reader_enabled->query( + context, field_name, qp, InvertedIndexQueryType::EQUAL_QUERY, bitmap_enabled); auto query_status_disabled = inverted_reader_disabled->query( - context, field_name, &values[i], InvertedIndexQueryType::EQUAL_QUERY, - bitmap_disabled); + context, field_name, qp, InvertedIndexQueryType::EQUAL_QUERY, bitmap_disabled); EXPECT_TRUE(query_status_enabled.ok()) << query_status_enabled; EXPECT_TRUE(query_status_disabled.ok()) << query_status_disabled; diff --git a/be/test/storage/storage_types_test.cpp b/be/test/storage/storage_types_test.cpp index b3ba23a51a0c0d..5d79e4f56ec320 100644 --- a/be/test/storage/storage_types_test.cpp +++ b/be/test/storage/storage_types_test.cpp @@ -45,20 +45,6 @@ void common_test(typename TypeTraits::CppType src_val) { EXPECT_EQ(field_type, type->type()); EXPECT_EQ(sizeof(src_val), type->size()); - // test min - { - typename TypeTraits::CppType dst_val; - type->set_to_min((char*)&dst_val); - - EXPECT_TRUE(type->cmp((char*)&src_val, (char*)&dst_val) > 0); - } - // test max - { - typename TypeTraits::CppType dst_val; - type->set_to_max((char*)&dst_val); - // NOTE: bool input is true, this will return 0 - EXPECT_TRUE(type->cmp((char*)&src_val, (char*)&dst_val) <= 0); - } } template @@ -69,22 +55,6 @@ void test_char(Slice src_val) { EXPECT_EQ(field->type(), fieldType); EXPECT_EQ(sizeof(src_val), type->size()); - // test min - { - char buf[64]; - Slice dst_val(buf, sizeof(buf)); - field->set_to_min((char*)&dst_val); - - EXPECT_TRUE(type->cmp((char*)&src_val, (char*)&dst_val) > 0); - } - // test max - { - char buf[64]; - Slice dst_val(buf, sizeof(buf)); - field->set_to_max((char*)&dst_val); - - EXPECT_TRUE(type->cmp((char*)&src_val, (char*)&dst_val) < 0); - } delete field; }