Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,26 @@ class OccurBooleanQueryBuilder {
OccurBooleanQueryBuilder() = default;
~OccurBooleanQueryBuilder() = default;

void add(const QueryPtr& query, Occur occur) { _sub_queries.emplace_back(occur, query); }
void add(const QueryPtr& query, Occur occur, std::string binding_key = {}) {
_sub_queries.emplace_back(occur, query);
_binding_keys.emplace_back(std::move(binding_key));
}

void set_minimum_number_should_match(size_t value) { _minimum_number_should_match = value; }

QueryPtr build() {
if (_minimum_number_should_match.has_value()) {
return std::make_shared<OccurBooleanQuery>(std::move(_sub_queries),
std::move(_binding_keys),
_minimum_number_should_match.value());
}
return std::make_shared<OccurBooleanQuery>(std::move(_sub_queries));
return std::make_shared<OccurBooleanQuery>(std::move(_sub_queries),
std::move(_binding_keys));
}

private:
std::vector<std::pair<Occur, QueryPtr>> _sub_queries;
std::vector<std::string> _binding_keys;
std::optional<size_t> _minimum_number_should_match;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,16 @@ using OccurBooleanQueryPtr = std::shared_ptr<OccurBooleanQuery>;

class OccurBooleanQuery : public Query {
public:
explicit OccurBooleanQuery(std::vector<std::pair<Occur, QueryPtr>> clauses)
explicit OccurBooleanQuery(std::vector<std::pair<Occur, QueryPtr>> clauses,
std::vector<std::string> binding_keys = {})
: _sub_queries(std::move(clauses)),
_binding_keys(std::move(binding_keys)),
_minimum_number_should_match(compute_default_minimum_should_match(_sub_queries)) {}

OccurBooleanQuery(std::vector<std::pair<Occur, QueryPtr>> clauses,
size_t minimum_number_should_match)
std::vector<std::string> binding_keys, size_t minimum_number_should_match)
: _sub_queries(std::move(clauses)),
_binding_keys(std::move(binding_keys)),
_minimum_number_should_match(minimum_number_should_match) {}

~OccurBooleanQuery() override = default;
Expand All @@ -47,8 +50,8 @@ class OccurBooleanQuery : public Query {
sub_weights.emplace_back(occur, query->weight(enable_scoring));
}
return std::make_shared<OccurBooleanWeight<SumCombinerPtr>>(
std::move(sub_weights), _minimum_number_should_match, enable_scoring,
std::make_shared<SumCombiner>());
std::move(sub_weights), std::move(_binding_keys), _minimum_number_should_match,
enable_scoring, std::make_shared<SumCombiner>());
}

const std::vector<std::pair<Occur, QueryPtr>>& clauses() const { return _sub_queries; }
Expand All @@ -69,6 +72,7 @@ class OccurBooleanQuery : public Query {
}

std::vector<std::pair<Occur, QueryPtr>> _sub_queries;
std::vector<std::string> _binding_keys;
size_t _minimum_number_should_match = 0;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,27 @@
#include "olap/rowset/segment_v2/inverted_index/query_v2/disjunction_scorer.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/exclude_scorer.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/intersection.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/intersection_scorer.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/reqopt_scorer.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/union/buffered_union.h"

namespace doris::segment_v2::inverted_index::query_v2 {

template <typename ScoreCombinerPtrT>
OccurBooleanWeight<ScoreCombinerPtrT>::OccurBooleanWeight(
std::vector<std::pair<Occur, WeightPtr>> sub_weights, size_t minimum_number_should_match,
bool enable_scoring, ScoreCombinerPtrT score_combiner)
std::vector<std::pair<Occur, WeightPtr>> sub_weights, std::vector<std::string> binding_keys,
size_t minimum_number_should_match, bool enable_scoring, ScoreCombinerPtrT score_combiner)
: _sub_weights(std::move(sub_weights)),
_binding_keys(std::move(binding_keys)),
_minimum_number_should_match(minimum_number_should_match),
_enable_scoring(enable_scoring),
_score_combiner(std::move(score_combiner)) {}
_score_combiner(std::move(score_combiner)) {
DCHECK(_binding_keys.empty() || _binding_keys.size() == _sub_weights.size())
<< "binding_keys size (" << _binding_keys.size() << ") must match sub_weights size ("
<< _sub_weights.size() << ") when non-empty";
// Ensure binding_keys has the same size as sub_weights (pads with empty strings if needed).
_binding_keys.resize(_sub_weights.size());
}

template <typename ScoreCombinerPtrT>
ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::scorer(const QueryExecutionContext& context) {
Expand Down Expand Up @@ -62,8 +70,10 @@ template <typename ScoreCombinerPtrT>
std::unordered_map<Occur, std::vector<ScorerPtr>>
OccurBooleanWeight<ScoreCombinerPtrT>::per_occur_scorers(const QueryExecutionContext& context) {
std::unordered_map<Occur, std::vector<ScorerPtr>> result;
for (const auto& [occur, weight] : _sub_weights) {
auto sub_scorer = weight->scorer(context);
for (size_t i = 0; i < _sub_weights.size(); ++i) {
const auto& [occur, weight] = _sub_weights[i];
const auto& binding_key = _binding_keys[i];
auto sub_scorer = weight->scorer(context, binding_key);
if (sub_scorer) {
result[occur].push_back(std::move(sub_scorer));
}
Expand Down Expand Up @@ -122,17 +132,6 @@ std::optional<CombinationMethod> OccurBooleanWeight<ScoreCombinerPtrT>::build_sh
}
}

template <typename ScoreCombinerPtrT>
ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::build_exclude_opt(
std::vector<ScorerPtr> must_not_scorers) {
if (must_not_scorers.empty()) {
return nullptr;
}
auto do_nothing = std::make_shared<DoNothingCombiner>();
auto specialized_scorer = scorer_union(std::move(must_not_scorers), do_nothing);
return into_box_scorer(std::move(specialized_scorer), do_nothing);
}

template <typename ScoreCombinerPtrT>
ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::effective_must_scorer(
std::vector<ScorerPtr> must_scorers, size_t must_num_all_scorers) {
Expand Down Expand Up @@ -242,13 +241,24 @@ SpecializedScorer OccurBooleanWeight<ScoreCombinerPtrT>::complex_scorer(
return std::make_shared<EmptyScorer>();
}

ScorerPtr exclude_opt = build_exclude_opt(std::move(must_not_scorers));
// Collect null bitmaps from MUST_NOT scorers (read from index, no iteration needed)
// and union the scorers into one for lazy exclusion.
roaring::Roaring exclude_null;
ScorerPtr exclude_opt =
build_exclude_opt(std::move(must_not_scorers), context.null_resolver, exclude_null);

SpecializedScorer positive_opt =
build_positive_opt(*should_opt, std::move(must_scorers), combiner, must_special_counts,
should_special_counts);
// Use null-bitmap-aware ExcludeScorer for MUST_NOT clauses.
// ExcludeScorer keeps lazy TRUE exclusion via seek-based iteration and adds
// O(1) null bitmap checks so that NOT(NULL) = NULL (SQL three-valued logic).
// Documents where the excluded field is NULL are placed in the null bitmap
// rather than being incorrectly included in the true result set.
if (exclude_opt) {
ScorerPtr positive_boxed = into_box_scorer(std::move(positive_opt), combiner);
return make_exclude(std::move(positive_boxed), std::move(exclude_opt));
return make_exclude(std::move(positive_boxed), std::move(exclude_opt),
std::move(exclude_null), context.null_resolver);
}
return positive_opt;
}
Expand Down Expand Up @@ -321,6 +331,30 @@ ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::into_box_scorer(SpecializedScor
std::move(specialized));
}

template <typename ScoreCombinerPtrT>
ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::build_exclude_opt(
std::vector<ScorerPtr> must_not_scorers, const NullBitmapResolver* resolver,
roaring::Roaring& exclude_null_out) {
if (must_not_scorers.empty()) {
return nullptr;
}

// Collect null bitmaps before union (read from index, no iteration needed).
for (auto& s : must_not_scorers) {
if (resolver != nullptr && s && s->has_null_bitmap(resolver)) {
const auto* nb = s->get_null_bitmap(resolver);
if (nb != nullptr) {
exclude_null_out |= *nb;
}
}
}

// Union all MUST_NOT scorers into one for lazy seek-based exclusion.
auto do_nothing = std::make_shared<DoNothingCombiner>();
auto specialized = scorer_union(std::move(must_not_scorers), do_nothing);
return into_box_scorer(std::move(specialized), do_nothing);
}

template class OccurBooleanWeight<SumCombinerPtr>;
template class OccurBooleanWeight<DoNothingCombinerPtr>;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

#pragma once

#include <roaring/roaring.hh>

#include "olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/scorer.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_scorer.h"
Expand Down Expand Up @@ -44,8 +46,8 @@ template <typename ScoreCombinerPtrT>
class OccurBooleanWeight : public Weight {
public:
OccurBooleanWeight(std::vector<std::pair<Occur, WeightPtr>> sub_weights,
size_t minimum_number_should_match, bool enable_scoring,
ScoreCombinerPtrT score_combiner);
std::vector<std::string> binding_keys, size_t minimum_number_should_match,
bool enable_scoring, ScoreCombinerPtrT score_combiner);
~OccurBooleanWeight() override = default;

ScorerPtr scorer(const QueryExecutionContext& context) override;
Expand All @@ -62,8 +64,6 @@ class OccurBooleanWeight : public Weight {
std::optional<CombinationMethod> build_should_opt(std::vector<ScorerPtr>& must_scorers,
std::vector<ScorerPtr> should_scorers,
CombinerT combiner, size_t num_all_scorers);
ScorerPtr build_exclude_opt(std::vector<ScorerPtr> must_not_scorers);

ScorerPtr effective_must_scorer(std::vector<ScorerPtr> must_scorers,
size_t must_num_all_scorers);

Expand All @@ -87,7 +87,12 @@ class OccurBooleanWeight : public Weight {
template <typename CombinerT>
ScorerPtr into_box_scorer(SpecializedScorer&& specialized, CombinerT combiner);

ScorerPtr build_exclude_opt(std::vector<ScorerPtr> must_not_scorers,
const NullBitmapResolver* resolver,
roaring::Roaring& exclude_null_out);

std::vector<std::pair<Occur, WeightPtr>> _sub_weights;
std::vector<std::string> _binding_keys;
size_t _minimum_number_should_match = 1;
bool _enable_scoring = false;
ScoreCombinerPtrT _score_combiner;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,29 @@ namespace doris::segment_v2::inverted_index::query_v2 {

template <typename TDocSet, typename TDocSetExclude>
Exclude<TDocSet, TDocSetExclude>::Exclude(TDocSet underlying_docset,
TDocSetExclude excluding_docset)
TDocSetExclude excluding_docset,
roaring::Roaring exclude_null,
const NullBitmapResolver* resolver)
: _underlying_docset(std::move(underlying_docset)),
_excluding_docset(std::move(excluding_docset)) {
_excluding_docset(std::move(excluding_docset)),
_exclude_null(std::move(exclude_null)) {
// Inherit the include scorer's null bitmap (e.g. from parent AND operations).
if (resolver != nullptr && _underlying_docset->has_null_bitmap(resolver)) {
const auto* nb = _underlying_docset->get_null_bitmap(resolver);
if (nb != nullptr) {
_null_bitmap |= *nb;
}
}

while (_underlying_docset->doc() != TERMINATED) {
uint32_t target = _underlying_docset->doc();
// O(1) null bitmap check (pre-computed from index, cheap).
if (!_exclude_null.isEmpty() && _exclude_null.contains(target)) {
_null_bitmap.add(target);
_underlying_docset->advance();
continue;
}
// Original lazy seek (unchanged).
if (!is_within(_excluding_docset, target)) {
break;
}
Expand All @@ -40,6 +58,12 @@ uint32_t Exclude<TDocSet, TDocSetExclude>::advance() {
if (candidate == TERMINATED) {
return TERMINATED;
}
// O(1) null bitmap check (pre-computed from index, cheap).
if (!_exclude_null.isEmpty() && _exclude_null.contains(candidate)) {
_null_bitmap.add(candidate);
continue;
}
// Original lazy seek (unchanged).
if (!is_within(_excluding_docset, candidate)) {
return candidate;
}
Expand All @@ -52,6 +76,11 @@ uint32_t Exclude<TDocSet, TDocSetExclude>::seek(uint32_t target) {
if (candidate == TERMINATED) {
return TERMINATED;
}
// O(1) null bitmap check (pre-computed from index, cheap).
if (!_exclude_null.isEmpty() && _exclude_null.contains(candidate)) {
_null_bitmap.add(candidate);
return advance();
}
if (!is_within(_excluding_docset, candidate)) {
return candidate;
}
Expand All @@ -76,11 +105,23 @@ float Exclude<TDocSet, TDocSetExclude>::score() {
return 0.0F;
}

ScorerPtr make_exclude(ScorerPtr underlying, ScorerPtr excluding) {
return std::make_shared<Exclude<ScorerPtr, ScorerPtr>>(std::move(underlying),
std::move(excluding));
template <typename TDocSet, typename TDocSetExclude>
bool Exclude<TDocSet, TDocSetExclude>::has_null_bitmap(const NullBitmapResolver* /*resolver*/) {
return !_null_bitmap.isEmpty();
}

template <typename TDocSet, typename TDocSetExclude>
const roaring::Roaring* Exclude<TDocSet, TDocSetExclude>::get_null_bitmap(
const NullBitmapResolver* /*resolver*/) {
return _null_bitmap.isEmpty() ? nullptr : &_null_bitmap;
}

ScorerPtr make_exclude(ScorerPtr underlying, ScorerPtr excluding, roaring::Roaring exclude_null,
const NullBitmapResolver* resolver) {
return std::make_shared<Exclude<ScorerPtr, ScorerPtr>>(
std::move(underlying), std::move(excluding), std::move(exclude_null), resolver);
}

template class Exclude<ScorerPtr, ScorerPtr>;

} // namespace doris::segment_v2::inverted_index::query_v2
} // namespace doris::segment_v2::inverted_index::query_v2
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

#pragma once

#include <roaring/roaring.hh>

#include "olap/rowset/segment_v2/inverted_index/query_v2/scorer.h"

namespace doris::segment_v2::inverted_index::query_v2 {
Expand All @@ -29,7 +31,8 @@ inline bool is_within(TDocSetExclude& docset, uint32_t doc) {
template <typename TDocSet, typename TDocSetExclude>
class Exclude final : public Scorer {
public:
Exclude(TDocSet underlying_docset, TDocSetExclude excluding_docset);
Exclude(TDocSet underlying_docset, TDocSetExclude excluding_docset,
roaring::Roaring exclude_null = {}, const NullBitmapResolver* resolver = nullptr);
~Exclude() override = default;

uint32_t advance() override;
Expand All @@ -38,13 +41,20 @@ class Exclude final : public Scorer {
uint32_t size_hint() const override;
float score() override;

bool has_null_bitmap(const NullBitmapResolver* resolver = nullptr) override;
const roaring::Roaring* get_null_bitmap(const NullBitmapResolver* resolver = nullptr) override;

private:
TDocSet _underlying_docset;
TDocSetExclude _excluding_docset;
roaring::Roaring _exclude_null;
roaring::Roaring _null_bitmap;
};

using ExcludeScorerPtr = std::shared_ptr<Exclude<ScorerPtr, ScorerPtr>>;

ScorerPtr make_exclude(ScorerPtr underlying, ScorerPtr excluding);
ScorerPtr make_exclude(ScorerPtr underlying, ScorerPtr excluding,
roaring::Roaring exclude_null = {},
const NullBitmapResolver* resolver = nullptr);

} // namespace doris::segment_v2::inverted_index::query_v2
2 changes: 1 addition & 1 deletion be/src/vec/functions/function_search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,7 @@ Status FunctionSearch::build_query_recursive(const TSearchClause& clause,
occur = map_thrift_occur(child_clause.occur);
}

builder->add(child_query, occur);
builder->add(child_query, occur, std::move(child_binding_key));
}
}

Expand Down
Loading
Loading