Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

No Mark to Semi join conversion in statistics propagation #11596

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/include/duckdb/optimizer/filter_pushdown.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class Optimizer;

class FilterPushdown {
public:
explicit FilterPushdown(Optimizer &optimizer);
explicit FilterPushdown(Optimizer &optimizer, bool convert_mark_joins = true);

//! Perform filter pushdown
unique_ptr<LogicalOperator> Rewrite(unique_ptr<LogicalOperator> op);
Expand All @@ -38,9 +38,11 @@ class FilterPushdown {
};

private:
vector<unique_ptr<Filter>> filters;
Optimizer &optimizer;
FilterCombiner combiner;
bool convert_mark_joins;

vector<unique_ptr<Filter>> filters;
//! Push down a LogicalAggregate op
unique_ptr<LogicalOperator> PushdownAggregate(unique_ptr<LogicalOperator> op);
//! Push down a distinct operator
Expand Down Expand Up @@ -90,8 +92,6 @@ class FilterPushdown {
void GenerateFilters();
//! if there are filters in this FilterPushdown node, push them into the combiner
void PushFilters();

FilterCombiner combiner;
};

} // namespace duckdb
5 changes: 3 additions & 2 deletions src/optimizer/filter_pushdown.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ namespace duckdb {

using Filter = FilterPushdown::Filter;

FilterPushdown::FilterPushdown(Optimizer &optimizer) : optimizer(optimizer), combiner(optimizer.context) {
FilterPushdown::FilterPushdown(Optimizer &optimizer, bool convert_mark_joins)
: optimizer(optimizer), combiner(optimizer.context), convert_mark_joins(convert_mark_joins) {
}

unique_ptr<LogicalOperator> FilterPushdown::Rewrite(unique_ptr<LogicalOperator> op) {
Expand Down Expand Up @@ -144,7 +145,7 @@ unique_ptr<LogicalOperator> FilterPushdown::PushFinalFilters(unique_ptr<LogicalO
unique_ptr<LogicalOperator> FilterPushdown::FinishPushdown(unique_ptr<LogicalOperator> op) {
// unhandled type, first perform filter pushdown in its children
for (auto &child : op->children) {
FilterPushdown pushdown(optimizer);
FilterPushdown pushdown(optimizer, convert_mark_joins);
child = pushdown.Rewrite(std::move(child));
}
// now push any existing filters
Expand Down
2 changes: 1 addition & 1 deletion src/optimizer/pushdown/pushdown_aggregate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownAggregate(unique_ptr<Logical

// pushdown into AGGREGATE and GROUP BY
// we cannot push expressions that refer to the aggregate
FilterPushdown child_pushdown(optimizer);
FilterPushdown child_pushdown(optimizer, convert_mark_joins);
for (idx_t i = 0; i < filters.size(); i++) {
auto &f = *filters[i];
if (f.bindings.find(aggr.aggregate_index) != f.bindings.end()) {
Expand Down
2 changes: 1 addition & 1 deletion src/optimizer/pushdown/pushdown_cross_product.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ using Filter = FilterPushdown::Filter;

unique_ptr<LogicalOperator> FilterPushdown::PushdownCrossProduct(unique_ptr<LogicalOperator> op) {
D_ASSERT(op->children.size() > 1);
FilterPushdown left_pushdown(optimizer), right_pushdown(optimizer);
FilterPushdown left_pushdown(optimizer, convert_mark_joins), right_pushdown(optimizer, convert_mark_joins);
vector<unique_ptr<Expression>> join_expressions;
auto join_ref_type = JoinRefType::REGULAR;
switch (op->type) {
Expand Down
2 changes: 1 addition & 1 deletion src/optimizer/pushdown/pushdown_left_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownLeftJoin(unique_ptr<LogicalO
if (op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN) {
return FinishPushdown(std::move(op));
}
FilterPushdown left_pushdown(optimizer), right_pushdown(optimizer);
FilterPushdown left_pushdown(optimizer, convert_mark_joins), right_pushdown(optimizer, convert_mark_joins);
// for a comparison join we create a FilterCombiner that checks if we can push conditions on LHS join conditions
// into the RHS of the join
FilterCombiner filter_combiner(optimizer);
Expand Down
6 changes: 3 additions & 3 deletions src/optimizer/pushdown/pushdown_mark_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownMarkJoin(unique_ptr<LogicalO
op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN || op->type == LogicalOperatorType::LOGICAL_ASOF_JOIN);

right_bindings.insert(comp_join.mark_index);
FilterPushdown left_pushdown(optimizer), right_pushdown(optimizer);
FilterPushdown left_pushdown(optimizer, convert_mark_joins), right_pushdown(optimizer, convert_mark_joins);
#ifdef DEBUG
bool simplified_mark_join = false;
#endif
Expand All @@ -35,7 +35,7 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownMarkJoin(unique_ptr<LogicalO
#endif
// this filter references the marker
// we can turn this into a SEMI join if the filter is on only the marker
if (filters[i]->filter->type == ExpressionType::BOUND_COLUMN_REF) {
if (filters[i]->filter->type == ExpressionType::BOUND_COLUMN_REF && convert_mark_joins) {
// filter just references the marker: turn into semi join
#ifdef DEBUG
simplified_mark_join = true;
Expand All @@ -61,7 +61,7 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownMarkJoin(unique_ptr<LogicalO
break;
}
}
if (all_null_values_are_equal) {
if (all_null_values_are_equal && convert_mark_joins) {
#ifdef DEBUG
simplified_mark_join = true;
#endif
Expand Down
2 changes: 1 addition & 1 deletion src/optimizer/pushdown/pushdown_projection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownProjection(unique_ptr<Logica
// push filter through logical projection
// all the BoundColumnRefExpressions in the filter should refer to the LogicalProjection
// we can rewrite them by replacing those references with the expression of the LogicalProjection node
FilterPushdown child_pushdown(optimizer);
FilterPushdown child_pushdown(optimizer, convert_mark_joins);
// There are some expressions can not be pushed down. We should keep them
// and add an extra filter operator.
vector<unique_ptr<Expression>> remain_expressions;
Expand Down
2 changes: 1 addition & 1 deletion src/optimizer/pushdown/pushdown_semi_anti_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownSemiAntiJoin(unique_ptr<Logi

// push all current filters down the left side
op->children[0] = Rewrite(std::move(op->children[0]));
FilterPushdown right_pushdown(optimizer);
FilterPushdown right_pushdown(optimizer, convert_mark_joins);
op->children[1] = right_pushdown.Rewrite(std::move(op->children[1]));

bool left_empty = op->children[0]->type == LogicalOperatorType::LOGICAL_EMPTY_RESULT;
Expand Down
2 changes: 1 addition & 1 deletion src/optimizer/pushdown/pushdown_set_operation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownSetOperation(unique_ptr<Logi
}

// pushdown into set operation, we can duplicate the condition and pushdown the expressions into both sides
FilterPushdown left_pushdown(optimizer), right_pushdown(optimizer);
FilterPushdown left_pushdown(optimizer, convert_mark_joins), right_pushdown(optimizer, convert_mark_joins);
for (idx_t i = 0; i < filters.size(); i++) {
// first create a copy of the filter
auto right_filter = make_uniq<Filter>();
Expand Down
2 changes: 1 addition & 1 deletion src/optimizer/pushdown/pushdown_single_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ unique_ptr<LogicalOperator> FilterPushdown::PushdownSingleJoin(unique_ptr<Logica
unordered_set<idx_t> &left_bindings,
unordered_set<idx_t> &right_bindings) {
D_ASSERT(op->Cast<LogicalJoin>().join_type == JoinType::SINGLE);
FilterPushdown left_pushdown(optimizer), right_pushdown(optimizer);
FilterPushdown left_pushdown(optimizer, convert_mark_joins), right_pushdown(optimizer, convert_mark_joins);
// now check the set of filters
for (idx_t i = 0; i < filters.size(); i++) {
auto side = JoinSide::GetJoinSide(filters[i]->bindings, left_bindings, right_bindings);
Expand Down
4 changes: 3 additions & 1 deletion src/optimizer/statistics/operator/propagate_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,9 @@ void StatisticsPropagator::CreateFilterFromJoinStats(unique_ptr<LogicalOperator>
child->expressions.emplace_back(std::move(filter_expr));
}

FilterPushdown filter_pushdown(optimizer);
// not allowed to let filter pushdowwn change mark joins to semi joins.
// semi joins are potentially slower AND the conversion can ruin column binding information
FilterPushdown filter_pushdown(optimizer, false);
child = filter_pushdown.Rewrite(std::move(child));
PropagateExpression(expr);
}
Expand Down
42 changes: 42 additions & 0 deletions test/optimizer/pushdown/pushdown_after_statistics.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# name: test/optimizer/pushdown/pushdown_after_statistics.test
# description: Test Table Filter Push Down
# group: [pushdown]

statement ok
set explain_output='optimized_only';


statement ok
create table big_probe as select range%3000 a, range%4000 b from range(100000);

statement ok
create table into_semi as select range%300 c from range(10000);

statement ok
create table into_get as select range d from range(100);


# the IN filter becomes a mark join. We should keep it a mark join at this point
query II
explain select * from big_probe, into_semi, into_get where c in (1, 3, 5, 7, 10, 14, 16, 20, 22) and c = d and a = c;
----
logical_opt <REGEX>:.*MARK.*


statement ok
create table mark_join_build as select range e from range(200);

# Now the in filter is a semi join.
query II
explain select * from big_probe, into_semi, into_get where c in (select e from mark_join_build) and c = d and a = c;
----
logical_opt <REGEX>:.*SEMI.*


statement ok
select t1.a from big_probe t1
where t1.a in
(select t2.b
from big_probe t2
where t2.b in (1206, 1202, 1322, 1204, 1370)
and t2.b not in (select t2_filter.a from big_probe t2_filter));
Loading