-
Notifications
You must be signed in to change notification settings - Fork 3.7k
[branch-2.1](memory) Fix memory tracker destructor deadlock #33504
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e998ce8
ac6cefa
b47348f
c00e26e
5732226
3a35f76
1684591
bcda1d8
1d0076e
35d5c9c
6bfeb0c
4b60265
d427106
0f34cc0
0a2a035
4b1cb17
9d93520
afdb060
d4b7b74
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -34,130 +34,77 @@ class VRuntimeFilterSlots { | |||||
| public: | ||||||
| VRuntimeFilterSlots( | ||||||
| const std::vector<std::shared_ptr<vectorized::VExprContext>>& build_expr_ctxs, | ||||||
| const std::vector<IRuntimeFilter*>& runtime_filters, bool need_local_merge = false) | ||||||
| : _build_expr_context(build_expr_ctxs), | ||||||
| _runtime_filters(runtime_filters), | ||||||
| _need_local_merge(need_local_merge) {} | ||||||
|
|
||||||
| Status init(RuntimeState* state, int64_t hash_table_size) { | ||||||
| // runtime filter effect strategy | ||||||
| // 1. we will ignore IN filter when hash_table_size is too big | ||||||
| // 2. we will ignore BLOOM filter and MinMax filter when hash_table_size | ||||||
| // is too small and IN filter has effect | ||||||
| std::map<int, bool> has_in_filter; | ||||||
|
|
||||||
| auto ignore_local_filter = [&](int filter_id) { | ||||||
| auto runtime_filter_mgr = _need_local_merge ? state->global_runtime_filter_mgr() | ||||||
| : state->local_runtime_filter_mgr(); | ||||||
|
|
||||||
| std::vector<IRuntimeFilter*> filters; | ||||||
| RETURN_IF_ERROR(runtime_filter_mgr->get_consume_filters(filter_id, filters)); | ||||||
| if (filters.empty()) { | ||||||
| throw Exception(ErrorCode::INTERNAL_ERROR, "filters empty, filter_id={}", | ||||||
| filter_id); | ||||||
| } | ||||||
| for (auto* filter : filters) { | ||||||
| filter->set_ignored(""); | ||||||
| filter->signal(); | ||||||
| } | ||||||
| return Status::OK(); | ||||||
| }; | ||||||
| const std::vector<IRuntimeFilter*>& runtime_filters) | ||||||
| : _build_expr_context(build_expr_ctxs), _runtime_filters(runtime_filters) { | ||||||
| for (auto* runtime_filter : _runtime_filters) { | ||||||
| _runtime_filters_map[runtime_filter->expr_order()].push_back(runtime_filter); | ||||||
| } | ||||||
| } | ||||||
|
|
||||||
| auto ignore_remote_filter = [](IRuntimeFilter* runtime_filter, std::string& msg) { | ||||||
| runtime_filter->set_ignored(msg); | ||||||
| RETURN_IF_ERROR(runtime_filter->publish()); | ||||||
| Status send_filter_size(RuntimeState* state, uint64_t hash_table_size, bool publish_local, | ||||||
| pipeline::CountedFinishDependency* dependency) { | ||||||
| if (_runtime_filters.empty() || publish_local) { | ||||||
| return Status::OK(); | ||||||
| }; | ||||||
|
|
||||||
| // ordered vector: IN, IN_OR_BLOOM, others. | ||||||
| // so we can ignore other filter if IN Predicate exists. | ||||||
| auto compare_desc = [](IRuntimeFilter* d1, IRuntimeFilter* d2) { | ||||||
| if (d1->type() == d2->type()) { | ||||||
| return false; | ||||||
| } else if (d1->type() == RuntimeFilterType::IN_FILTER) { | ||||||
| return true; | ||||||
| } else if (d2->type() == RuntimeFilterType::IN_FILTER) { | ||||||
| return false; | ||||||
| } else if (d1->type() == RuntimeFilterType::IN_OR_BLOOM_FILTER) { | ||||||
| return true; | ||||||
| } else if (d2->type() == RuntimeFilterType::IN_OR_BLOOM_FILTER) { | ||||||
| return false; | ||||||
| } else { | ||||||
| return d1->type() < d2->type(); | ||||||
| } | ||||||
| for (auto* runtime_filter : _runtime_filters) { | ||||||
| if (runtime_filter->need_sync_filter_size()) { | ||||||
| runtime_filter->set_dependency(dependency); | ||||||
| } | ||||||
| }; | ||||||
| std::sort(_runtime_filters.begin(), _runtime_filters.end(), compare_desc); | ||||||
|
|
||||||
| // do not create 'in filter' when hash_table size over limit | ||||||
| const auto max_in_num = state->runtime_filter_max_in_num(); | ||||||
| const bool over_max_in_num = (hash_table_size >= max_in_num); | ||||||
| } | ||||||
|
|
||||||
| // send_filter_size may call dependency->sub(), so we call set_dependency firstly for all rf to avoid dependency set_ready repeatedly | ||||||
| for (auto* runtime_filter : _runtime_filters) { | ||||||
| if (runtime_filter->expr_order() < 0 || | ||||||
| runtime_filter->expr_order() >= _build_expr_context.size()) { | ||||||
| return Status::InternalError( | ||||||
| "runtime_filter meet invalid expr_order, expr_order={}, " | ||||||
| "_build_expr_context.size={}", | ||||||
| runtime_filter->expr_order(), _build_expr_context.size()); | ||||||
| if (runtime_filter->need_sync_filter_size()) { | ||||||
| RETURN_IF_ERROR(runtime_filter->send_filter_size(hash_table_size)); | ||||||
| } | ||||||
| } | ||||||
| return Status::OK(); | ||||||
| } | ||||||
|
|
||||||
| bool is_in_filter = (runtime_filter->type() == RuntimeFilterType::IN_FILTER); | ||||||
| // use synced size when this rf has global merged | ||||||
| static uint64_t get_real_size(IRuntimeFilter* runtime_filter, uint64_t hash_table_size) { | ||||||
| return runtime_filter->isset_synced_size() ? runtime_filter->get_synced_size() | ||||||
| : hash_table_size; | ||||||
| } | ||||||
|
|
||||||
| if (over_max_in_num && | ||||||
| runtime_filter->type() == RuntimeFilterType::IN_OR_BLOOM_FILTER) { | ||||||
| RETURN_IF_ERROR(runtime_filter->change_to_bloom_filter()); | ||||||
| Status ignore_filters(RuntimeState* state) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. warning: method 'ignore_filters' can be made static [readability-convert-member-functions-to-static]
Suggested change
|
||||||
| // process ignore duplicate IN_FILTER | ||||||
| std::unordered_set<int> has_in_filter; | ||||||
| for (auto* filter : _runtime_filters) { | ||||||
| if (filter->get_real_type() != RuntimeFilterType::IN_FILTER) { | ||||||
| continue; | ||||||
| } | ||||||
|
|
||||||
| if (runtime_filter->is_bloomfilter()) { | ||||||
| RETURN_IF_ERROR(runtime_filter->init_bloom_filter(hash_table_size)); | ||||||
| if (has_in_filter.contains(filter->expr_order())) { | ||||||
| filter->set_ignored(); | ||||||
| continue; | ||||||
| } | ||||||
| has_in_filter.insert(filter->expr_order()); | ||||||
| } | ||||||
|
|
||||||
| // Note: | ||||||
| // In the case that exist *remote target* and in filter and other filter, | ||||||
| // we must merge other filter whatever in filter is over the max num in current node, | ||||||
| // because: | ||||||
| // case 1: (in filter >= max num) in current node, so in filter will be ignored, | ||||||
| // and then other filter can be used | ||||||
| // case 2: (in filter < max num) in current node, we don't know whether the in filter | ||||||
| // will be ignored in merge node, so we must transfer other filter to merge node | ||||||
| if (!runtime_filter->has_remote_target()) { | ||||||
| bool exists_in_filter = has_in_filter[runtime_filter->expr_order()]; | ||||||
| if (is_in_filter && over_max_in_num) { | ||||||
| VLOG_DEBUG << "fragment instance " << print_id(state->fragment_instance_id()) | ||||||
| << " ignore runtime filter(in filter id " | ||||||
| << runtime_filter->filter_id() << ") because: in_num(" | ||||||
| << hash_table_size << ") >= max_in_num(" << max_in_num << ")"; | ||||||
| RETURN_IF_ERROR(ignore_local_filter(runtime_filter->filter_id())); | ||||||
| continue; | ||||||
| } else if (!is_in_filter && exists_in_filter) { | ||||||
| // do not create 'bloom filter' and 'minmax filter' when 'in filter' has created | ||||||
| // because in filter is exactly filter, so it is enough to filter data | ||||||
| VLOG_DEBUG << "fragment instance " << print_id(state->fragment_instance_id()) | ||||||
| << " ignore runtime filter(" | ||||||
| << IRuntimeFilter::to_string(runtime_filter->type()) << " id " | ||||||
| << runtime_filter->filter_id() | ||||||
| << ") because: already exists in filter"; | ||||||
| RETURN_IF_ERROR(ignore_local_filter(runtime_filter->filter_id())); | ||||||
| continue; | ||||||
| } | ||||||
| } else if (is_in_filter && over_max_in_num) { | ||||||
| std::string msg = fmt::format( | ||||||
| "fragment instance {} ignore runtime filter(in filter id {}) because: " | ||||||
| "in_num({}) >= max_in_num({})", | ||||||
| print_id(state->fragment_instance_id()), runtime_filter->filter_id(), | ||||||
| hash_table_size, max_in_num); | ||||||
| RETURN_IF_ERROR(ignore_remote_filter(runtime_filter, msg)); | ||||||
| // process ignore filter when it has IN_FILTER on same expr, and init bloom filter size | ||||||
| for (auto* filter : _runtime_filters) { | ||||||
| if (filter->get_real_type() == RuntimeFilterType::IN_FILTER || | ||||||
| !has_in_filter.contains(filter->expr_order())) { | ||||||
| continue; | ||||||
| } | ||||||
| filter->set_ignored(); | ||||||
| } | ||||||
| return Status::OK(); | ||||||
| } | ||||||
|
|
||||||
| if ((runtime_filter->type() == RuntimeFilterType::IN_FILTER) || | ||||||
| (runtime_filter->type() == RuntimeFilterType::IN_OR_BLOOM_FILTER && | ||||||
| !over_max_in_num)) { | ||||||
| has_in_filter[runtime_filter->expr_order()] = true; | ||||||
| Status init_filters(RuntimeState* state, uint64_t local_hash_table_size) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. warning: method 'init_filters' can be made static [readability-convert-member-functions-to-static]
Suggested change
|
||||||
| // process IN_OR_BLOOM_FILTER's real type | ||||||
| for (auto* filter : _runtime_filters) { | ||||||
| if (filter->type() == RuntimeFilterType::IN_OR_BLOOM_FILTER && | ||||||
| get_real_size(filter, local_hash_table_size) > state->runtime_filter_max_in_num()) { | ||||||
| RETURN_IF_ERROR(filter->change_to_bloom_filter()); | ||||||
| } | ||||||
| _runtime_filters_map[runtime_filter->expr_order()].push_back(runtime_filter); | ||||||
| } | ||||||
|
|
||||||
| if (filter->get_real_type() == RuntimeFilterType::BLOOM_FILTER) { | ||||||
| RETURN_IF_ERROR( | ||||||
| filter->init_bloom_filter(get_real_size(filter, local_hash_table_size))); | ||||||
| } | ||||||
| } | ||||||
| return Status::OK(); | ||||||
| } | ||||||
|
|
||||||
|
|
@@ -171,6 +118,9 @@ class VRuntimeFilterSlots { | |||||
| int result_column_id = _build_expr_context[i]->get_last_result_column_id(); | ||||||
| const auto& column = block->get_by_position(result_column_id).column; | ||||||
| for (auto* filter : iter->second) { | ||||||
| if (filter->get_ignored()) { | ||||||
| continue; | ||||||
| } | ||||||
| filter->insert_batch(column, 1); | ||||||
| } | ||||||
| } | ||||||
|
|
@@ -213,7 +163,6 @@ class VRuntimeFilterSlots { | |||||
| private: | ||||||
| const std::vector<std::shared_ptr<vectorized::VExprContext>>& _build_expr_context; | ||||||
| std::vector<IRuntimeFilter*> _runtime_filters; | ||||||
| const bool _need_local_merge = false; | ||||||
| // prob_contition index -> [IRuntimeFilter] | ||||||
| std::map<int, std::list<IRuntimeFilter*>> _runtime_filters_map; | ||||||
| }; | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
warning: method 'send_filter_size' can be made static [readability-convert-member-functions-to-static]