Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
88fccb8
[Chore](pick) pick #60141 #59410 (#61287)
BiteTheDDDDt Mar 13, 2026
24fd23b
[pick](code) pick "adjust some local shuffle rules" and "[conf](passt…
Mryange Mar 13, 2026
af47036
[Opt](exec) cherry pick the opt code #60137 #59492 #59446 #58728 (#61…
HappenLee Mar 13, 2026
c72ce08
branch-4.1:(fix)(agg) Adjust agg strategy when table satisfy distinct…
feiniaofeiafei Mar 13, 2026
f3ac2d6
branch-4.1:[enhance](aggregate)Add rewrite rule DecomposeRepeatWithPr…
feiniaofeiafei Mar 13, 2026
8486bcc
branch-4.1 [feature](nereids) add RewriteSimpleAggToConstantRule to r…
englefly Mar 13, 2026
67a5ca1
[Chore](pick) pick changes from PR #61104 and PR #60941 (#61303)
BiteTheDDDDt Mar 13, 2026
c371e5d
branch-4.1 [opt](Nereids) strip redundant widening integer cast in Su…
englefly Mar 13, 2026
914ae22
branch-4.1: [feature](join) support ASOF join (#59591) (#61321)
zclllyybb Mar 13, 2026
c572db1
branch-4.1 cherry-pick [feat](format) support native format (#61286)
eldenmoon Mar 13, 2026
d704934
branch-4.1 [feat](topn lazy materialize)using index topn lazy (#59572…
englefly Mar 14, 2026
867710b
branch-4.1: [fix](search) Replace ExcludeScorer with null-bitmap-awar…
airborne12 Mar 14, 2026
deb5ab5
branch-4.1:[enhance](shuffle) shuffle key prune opt (#59449) (#61298)
feiniaofeiafei Mar 14, 2026
e936ecb
branch-4.1: [opt](jvm) enable BE jvm monitor by default (#60343) (#61…
morningman Mar 14, 2026
452cc47
[opt](jindofs) update jindofs to 6.10.4 and add DlfFileIO (#60856)
morningman Feb 27, 2026
c56d8e9
[feat](iceberg) support aliyun dlf iceberg rest catalog (#60796)
morningman Mar 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1510,7 +1510,7 @@ DEFINE_mInt64(fetch_remote_schema_rpc_timeout_ms, "60000");
DEFINE_Int64(s3_file_system_local_upload_buffer_size, "5242880");

//JVM monitoring enable. To prevent be from crashing due to jvm compatibility issues. The default setting is off.
DEFINE_Bool(enable_jvm_monitor, "false");
DEFINE_Bool(enable_jvm_monitor, "true");

DEFINE_Int32(load_data_dirs_threads, "-1");

Expand Down
8 changes: 8 additions & 0 deletions be/src/common/status.h
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,14 @@ using ResultError = unexpected<Status>;
std::forward<T>(res).value(); \
})

// core in Debug mode, exception in Release mode.
#define DORIS_CHECK(stmt) \
do { \
if (!static_cast<bool>(stmt)) [[unlikely]] { \
throw Exception(Status::FatalError(fmt::format("Check failed: {}", #stmt))); \
} \
} while (false)

} // namespace doris

// specify formatter for Status
Expand Down
15 changes: 8 additions & 7 deletions be/src/olap/comparison_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,8 @@ class ComparisonPredicateBase final : public ColumnPredicate {
}

template <bool is_and>
void __attribute__((flatten))
_evaluate_vec_internal(const vectorized::IColumn& column, uint16_t size, bool* flags) const {
void __attribute__((flatten)) _evaluate_vec_internal(const vectorized::IColumn& column,
uint16_t size, bool* flags) const {
uint16_t current_evaluated_rows = 0;
uint16_t current_passed_rows = 0;
if (_can_ignore()) {
Expand Down Expand Up @@ -579,9 +579,10 @@ class ComparisonPredicateBase final : public ColumnPredicate {
}

template <bool is_nullable, bool is_and, typename TArray, typename TValue>
void __attribute__((flatten))
_base_loop_vec(uint16_t size, bool* __restrict bflags, const uint8_t* __restrict null_map,
const TArray* __restrict data_array, const TValue& value) const {
void __attribute__((flatten)) _base_loop_vec(uint16_t size, bool* __restrict bflags,
const uint8_t* __restrict null_map,
const TArray* __restrict data_array,
const TValue& value) const {
//uint8_t helps compiler to generate vectorized code
auto* flags = reinterpret_cast<uint8_t*>(bflags);
if constexpr (is_and) {
Expand Down Expand Up @@ -696,8 +697,8 @@ class ComparisonPredicateBase final : public ColumnPredicate {
}
}

int32_t __attribute__((flatten))
_find_code_from_dictionary_column(const vectorized::ColumnDictI32& column) const {
int32_t __attribute__((flatten)) _find_code_from_dictionary_column(
const vectorized::ColumnDictI32& column) const {
static_assert(is_string_type(Type),
"Only string type predicate can use dictionary column.");
int32_t code = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,26 @@ class OccurBooleanQueryBuilder {
OccurBooleanQueryBuilder() = default;
~OccurBooleanQueryBuilder() = default;

void add(const QueryPtr& query, Occur occur) { _sub_queries.emplace_back(occur, query); }
void add(const QueryPtr& query, Occur occur, std::string binding_key = {}) {
_sub_queries.emplace_back(occur, query);
_binding_keys.emplace_back(std::move(binding_key));
}

void set_minimum_number_should_match(size_t value) { _minimum_number_should_match = value; }

QueryPtr build() {
if (_minimum_number_should_match.has_value()) {
return std::make_shared<OccurBooleanQuery>(std::move(_sub_queries),
std::move(_binding_keys),
_minimum_number_should_match.value());
}
return std::make_shared<OccurBooleanQuery>(std::move(_sub_queries));
return std::make_shared<OccurBooleanQuery>(std::move(_sub_queries),
std::move(_binding_keys));
}

private:
std::vector<std::pair<Occur, QueryPtr>> _sub_queries;
std::vector<std::string> _binding_keys;
std::optional<size_t> _minimum_number_should_match;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,16 @@ using OccurBooleanQueryPtr = std::shared_ptr<OccurBooleanQuery>;

class OccurBooleanQuery : public Query {
public:
explicit OccurBooleanQuery(std::vector<std::pair<Occur, QueryPtr>> clauses)
explicit OccurBooleanQuery(std::vector<std::pair<Occur, QueryPtr>> clauses,
std::vector<std::string> binding_keys = {})
: _sub_queries(std::move(clauses)),
_binding_keys(std::move(binding_keys)),
_minimum_number_should_match(compute_default_minimum_should_match(_sub_queries)) {}

OccurBooleanQuery(std::vector<std::pair<Occur, QueryPtr>> clauses,
size_t minimum_number_should_match)
std::vector<std::string> binding_keys, size_t minimum_number_should_match)
: _sub_queries(std::move(clauses)),
_binding_keys(std::move(binding_keys)),
_minimum_number_should_match(minimum_number_should_match) {}

~OccurBooleanQuery() override = default;
Expand All @@ -47,8 +50,8 @@ class OccurBooleanQuery : public Query {
sub_weights.emplace_back(occur, query->weight(enable_scoring));
}
return std::make_shared<OccurBooleanWeight<SumCombinerPtr>>(
std::move(sub_weights), _minimum_number_should_match, enable_scoring,
std::make_shared<SumCombiner>());
std::move(sub_weights), std::move(_binding_keys), _minimum_number_should_match,
enable_scoring, std::make_shared<SumCombiner>());
}

const std::vector<std::pair<Occur, QueryPtr>>& clauses() const { return _sub_queries; }
Expand All @@ -69,6 +72,7 @@ class OccurBooleanQuery : public Query {
}

std::vector<std::pair<Occur, QueryPtr>> _sub_queries;
std::vector<std::string> _binding_keys;
size_t _minimum_number_should_match = 0;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,27 @@
#include "olap/rowset/segment_v2/inverted_index/query_v2/disjunction_scorer.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/exclude_scorer.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/intersection.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/intersection_scorer.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/reqopt_scorer.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/union/buffered_union.h"

namespace doris::segment_v2::inverted_index::query_v2 {

template <typename ScoreCombinerPtrT>
OccurBooleanWeight<ScoreCombinerPtrT>::OccurBooleanWeight(
std::vector<std::pair<Occur, WeightPtr>> sub_weights, size_t minimum_number_should_match,
bool enable_scoring, ScoreCombinerPtrT score_combiner)
std::vector<std::pair<Occur, WeightPtr>> sub_weights, std::vector<std::string> binding_keys,
size_t minimum_number_should_match, bool enable_scoring, ScoreCombinerPtrT score_combiner)
: _sub_weights(std::move(sub_weights)),
_binding_keys(std::move(binding_keys)),
_minimum_number_should_match(minimum_number_should_match),
_enable_scoring(enable_scoring),
_score_combiner(std::move(score_combiner)) {}
_score_combiner(std::move(score_combiner)) {
DCHECK(_binding_keys.empty() || _binding_keys.size() == _sub_weights.size())
<< "binding_keys size (" << _binding_keys.size() << ") must match sub_weights size ("
<< _sub_weights.size() << ") when non-empty";
// Ensure binding_keys has the same size as sub_weights (pads with empty strings if needed).
_binding_keys.resize(_sub_weights.size());
}

template <typename ScoreCombinerPtrT>
ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::scorer(const QueryExecutionContext& context) {
Expand Down Expand Up @@ -62,8 +70,10 @@ template <typename ScoreCombinerPtrT>
std::unordered_map<Occur, std::vector<ScorerPtr>>
OccurBooleanWeight<ScoreCombinerPtrT>::per_occur_scorers(const QueryExecutionContext& context) {
std::unordered_map<Occur, std::vector<ScorerPtr>> result;
for (const auto& [occur, weight] : _sub_weights) {
auto sub_scorer = weight->scorer(context);
for (size_t i = 0; i < _sub_weights.size(); ++i) {
const auto& [occur, weight] = _sub_weights[i];
const auto& binding_key = _binding_keys[i];
auto sub_scorer = weight->scorer(context, binding_key);
if (sub_scorer) {
result[occur].push_back(std::move(sub_scorer));
}
Expand Down Expand Up @@ -122,17 +132,6 @@ std::optional<CombinationMethod> OccurBooleanWeight<ScoreCombinerPtrT>::build_sh
}
}

template <typename ScoreCombinerPtrT>
ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::build_exclude_opt(
std::vector<ScorerPtr> must_not_scorers) {
if (must_not_scorers.empty()) {
return nullptr;
}
auto do_nothing = std::make_shared<DoNothingCombiner>();
auto specialized_scorer = scorer_union(std::move(must_not_scorers), do_nothing);
return into_box_scorer(std::move(specialized_scorer), do_nothing);
}

template <typename ScoreCombinerPtrT>
ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::effective_must_scorer(
std::vector<ScorerPtr> must_scorers, size_t must_num_all_scorers) {
Expand Down Expand Up @@ -242,13 +241,24 @@ SpecializedScorer OccurBooleanWeight<ScoreCombinerPtrT>::complex_scorer(
return std::make_shared<EmptyScorer>();
}

ScorerPtr exclude_opt = build_exclude_opt(std::move(must_not_scorers));
// Collect null bitmaps from MUST_NOT scorers (read from index, no iteration needed)
// and union the scorers into one for lazy exclusion.
roaring::Roaring exclude_null;
ScorerPtr exclude_opt =
build_exclude_opt(std::move(must_not_scorers), context.null_resolver, exclude_null);

SpecializedScorer positive_opt =
build_positive_opt(*should_opt, std::move(must_scorers), combiner, must_special_counts,
should_special_counts);
// Use null-bitmap-aware ExcludeScorer for MUST_NOT clauses.
// ExcludeScorer keeps lazy TRUE exclusion via seek-based iteration and adds
// O(1) null bitmap checks so that NOT(NULL) = NULL (SQL three-valued logic).
// Documents where the excluded field is NULL are placed in the null bitmap
// rather than being incorrectly included in the true result set.
if (exclude_opt) {
ScorerPtr positive_boxed = into_box_scorer(std::move(positive_opt), combiner);
return make_exclude(std::move(positive_boxed), std::move(exclude_opt));
return make_exclude(std::move(positive_boxed), std::move(exclude_opt),
std::move(exclude_null), context.null_resolver);
}
return positive_opt;
}
Expand Down Expand Up @@ -321,6 +331,30 @@ ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::into_box_scorer(SpecializedScor
std::move(specialized));
}

template <typename ScoreCombinerPtrT>
ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::build_exclude_opt(
std::vector<ScorerPtr> must_not_scorers, const NullBitmapResolver* resolver,
roaring::Roaring& exclude_null_out) {
if (must_not_scorers.empty()) {
return nullptr;
}

// Collect null bitmaps before union (read from index, no iteration needed).
for (auto& s : must_not_scorers) {
if (resolver != nullptr && s && s->has_null_bitmap(resolver)) {
const auto* nb = s->get_null_bitmap(resolver);
if (nb != nullptr) {
exclude_null_out |= *nb;
}
}
}

// Union all MUST_NOT scorers into one for lazy seek-based exclusion.
auto do_nothing = std::make_shared<DoNothingCombiner>();
auto specialized = scorer_union(std::move(must_not_scorers), do_nothing);
return into_box_scorer(std::move(specialized), do_nothing);
}

template class OccurBooleanWeight<SumCombinerPtr>;
template class OccurBooleanWeight<DoNothingCombinerPtr>;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

#pragma once

#include <roaring/roaring.hh>

#include "olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/scorer.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_scorer.h"
Expand Down Expand Up @@ -44,8 +46,8 @@ template <typename ScoreCombinerPtrT>
class OccurBooleanWeight : public Weight {
public:
OccurBooleanWeight(std::vector<std::pair<Occur, WeightPtr>> sub_weights,
size_t minimum_number_should_match, bool enable_scoring,
ScoreCombinerPtrT score_combiner);
std::vector<std::string> binding_keys, size_t minimum_number_should_match,
bool enable_scoring, ScoreCombinerPtrT score_combiner);
~OccurBooleanWeight() override = default;

ScorerPtr scorer(const QueryExecutionContext& context) override;
Expand All @@ -62,8 +64,6 @@ class OccurBooleanWeight : public Weight {
std::optional<CombinationMethod> build_should_opt(std::vector<ScorerPtr>& must_scorers,
std::vector<ScorerPtr> should_scorers,
CombinerT combiner, size_t num_all_scorers);
ScorerPtr build_exclude_opt(std::vector<ScorerPtr> must_not_scorers);

ScorerPtr effective_must_scorer(std::vector<ScorerPtr> must_scorers,
size_t must_num_all_scorers);

Expand All @@ -87,7 +87,12 @@ class OccurBooleanWeight : public Weight {
template <typename CombinerT>
ScorerPtr into_box_scorer(SpecializedScorer&& specialized, CombinerT combiner);

ScorerPtr build_exclude_opt(std::vector<ScorerPtr> must_not_scorers,
const NullBitmapResolver* resolver,
roaring::Roaring& exclude_null_out);

std::vector<std::pair<Occur, WeightPtr>> _sub_weights;
std::vector<std::string> _binding_keys;
size_t _minimum_number_should_match = 1;
bool _enable_scoring = false;
ScoreCombinerPtrT _score_combiner;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,29 @@ namespace doris::segment_v2::inverted_index::query_v2 {

template <typename TDocSet, typename TDocSetExclude>
Exclude<TDocSet, TDocSetExclude>::Exclude(TDocSet underlying_docset,
TDocSetExclude excluding_docset)
TDocSetExclude excluding_docset,
roaring::Roaring exclude_null,
const NullBitmapResolver* resolver)
: _underlying_docset(std::move(underlying_docset)),
_excluding_docset(std::move(excluding_docset)) {
_excluding_docset(std::move(excluding_docset)),
_exclude_null(std::move(exclude_null)) {
// Inherit the include scorer's null bitmap (e.g. from parent AND operations).
if (resolver != nullptr && _underlying_docset->has_null_bitmap(resolver)) {
const auto* nb = _underlying_docset->get_null_bitmap(resolver);
if (nb != nullptr) {
_null_bitmap |= *nb;
}
}

while (_underlying_docset->doc() != TERMINATED) {
uint32_t target = _underlying_docset->doc();
// O(1) null bitmap check (pre-computed from index, cheap).
if (!_exclude_null.isEmpty() && _exclude_null.contains(target)) {
_null_bitmap.add(target);
_underlying_docset->advance();
continue;
}
// Original lazy seek (unchanged).
if (!is_within(_excluding_docset, target)) {
break;
}
Expand All @@ -40,6 +58,12 @@ uint32_t Exclude<TDocSet, TDocSetExclude>::advance() {
if (candidate == TERMINATED) {
return TERMINATED;
}
// O(1) null bitmap check (pre-computed from index, cheap).
if (!_exclude_null.isEmpty() && _exclude_null.contains(candidate)) {
_null_bitmap.add(candidate);
continue;
}
// Original lazy seek (unchanged).
if (!is_within(_excluding_docset, candidate)) {
return candidate;
}
Expand All @@ -52,6 +76,11 @@ uint32_t Exclude<TDocSet, TDocSetExclude>::seek(uint32_t target) {
if (candidate == TERMINATED) {
return TERMINATED;
}
// O(1) null bitmap check (pre-computed from index, cheap).
if (!_exclude_null.isEmpty() && _exclude_null.contains(candidate)) {
_null_bitmap.add(candidate);
return advance();
}
if (!is_within(_excluding_docset, candidate)) {
return candidate;
}
Expand All @@ -76,11 +105,23 @@ float Exclude<TDocSet, TDocSetExclude>::score() {
return 0.0F;
}

ScorerPtr make_exclude(ScorerPtr underlying, ScorerPtr excluding) {
return std::make_shared<Exclude<ScorerPtr, ScorerPtr>>(std::move(underlying),
std::move(excluding));
template <typename TDocSet, typename TDocSetExclude>
bool Exclude<TDocSet, TDocSetExclude>::has_null_bitmap(const NullBitmapResolver* /*resolver*/) {
return !_null_bitmap.isEmpty();
}

template <typename TDocSet, typename TDocSetExclude>
const roaring::Roaring* Exclude<TDocSet, TDocSetExclude>::get_null_bitmap(
const NullBitmapResolver* /*resolver*/) {
return _null_bitmap.isEmpty() ? nullptr : &_null_bitmap;
}

ScorerPtr make_exclude(ScorerPtr underlying, ScorerPtr excluding, roaring::Roaring exclude_null,
const NullBitmapResolver* resolver) {
return std::make_shared<Exclude<ScorerPtr, ScorerPtr>>(
std::move(underlying), std::move(excluding), std::move(exclude_null), resolver);
}

template class Exclude<ScorerPtr, ScorerPtr>;

} // namespace doris::segment_v2::inverted_index::query_v2
} // namespace doris::segment_v2::inverted_index::query_v2
Loading
Loading