From 064d8d255b1aa9f3fb157759f70d21063f721032 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 17 Oct 2023 23:10:10 +0000 Subject: [PATCH] Backport #55678 to 23.9: Fix filtering by virtual columns with OR filter in query (resubmit) --- src/Storages/VirtualColumnUtils.cpp | 55 +++++++++++++++---- tests/integration/test_system_merges/test.py | 2 +- .../02840_merge__table_or_filter.sql.j2 | 5 ++ .../0_stateless/02896_multiple_OR.reference | 14 +++++ .../queries/0_stateless/02896_multiple_OR.sql | 28 ++++++++++ 5 files changed, 93 insertions(+), 11 deletions(-) create mode 100644 tests/queries/0_stateless/02896_multiple_OR.reference create mode 100644 tests/queries/0_stateless/02896_multiple_OR.sql diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index d0d6233728ef..219043f25c61 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -81,14 +82,33 @@ bool extractFunctions(const ASTPtr & expression, const std::functionname == "or") { - bool ret = true; + bool ret = false; ASTs or_args; for (const auto & child : function->arguments->children) - ret &= extractFunctions(child, is_constant, or_args); - /// We can keep condition only if it still OR condition (i.e. we - /// have dependent conditions for columns at both sides) - if (or_args.size() == 2) + ret |= extractFunctions(child, is_constant, or_args); + + if (!or_args.empty()) + { + /// In case of there are less number of arguments for which + /// is_constant() == true, we need to add always-true + /// implicitly to avoid breaking AND invariant. + /// + /// Consider the following: + /// + /// ((value = 10) OR (_table = 'v2')) AND ((_table = 'v1') OR (value = 20)) + /// + /// Without implicit always-true: + /// + /// (_table = 'v2') AND (_table = 'v1') + /// + /// With: + /// + /// (_table = 'v2' OR 1) AND (_table = 'v1' OR 1) -> (_table = 'v2') OR (_table = 'v1') + /// + if (or_args.size() != function->arguments->children.size()) + or_args.push_back(std::make_shared(Field(1))); result.push_back(makeASTForLogicalOr(std::move(or_args))); + } return ret; } } @@ -165,8 +185,10 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block if (!select.where() && !select.prewhere()) return unmodified; - // Provide input columns as constant columns to check if an expression is constant. - std::function is_constant = [&block, &context](const ASTPtr & node) + // Provide input columns as constant columns to check if an expression is + // constant and depends on the columns from provided block (the last is + // required to allow skipping some conditions for handling OR). + std::function is_constant = [&block, &context](const ASTPtr & expr) { auto actions = std::make_shared(block.getColumnsWithTypeAndName()); PreparedSetsPtr prepared_sets = std::make_shared(); @@ -178,13 +200,26 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block context, SizeLimits{}, 1, source_columns, std::move(actions), prepared_sets, true, true, true, { aggregation_keys, grouping_set_keys, GroupByKind::NONE }); - ActionsVisitor(visitor_data).visit(node); + ActionsVisitor(visitor_data).visit(expr); actions = visitor_data.getActions(); + auto expr_column_name = expr->getColumnName(); + + const auto * expr_const_node = actions->tryFindInOutputs(expr_column_name); + if (!expr_const_node) + return false; + auto filter_actions = ActionsDAG::buildFilterActionsDAG({expr_const_node}, {}, context); + const auto & nodes = filter_actions->getNodes(); + bool has_dependent_columns = std::any_of(nodes.begin(), nodes.end(), [&](const auto & node) + { + return block.has(node.result_name); + }); + if (!has_dependent_columns) + return false; + auto expression_actions = std::make_shared(actions); auto block_with_constants = block; expression_actions->execute(block_with_constants); - auto column_name = node->getColumnName(); - return block_with_constants.has(column_name) && isColumnConst(*block_with_constants.getByName(column_name).column); + return block_with_constants.has(expr_column_name) && isColumnConst(*block_with_constants.getByName(expr_column_name).column); }; /// Create an expression that evaluates the expressions in WHERE and PREWHERE, depending only on the existing columns. diff --git a/tests/integration/test_system_merges/test.py b/tests/integration/test_system_merges/test.py index ff303afe19e9..5f0fc7b4d84f 100644 --- a/tests/integration/test_system_merges/test.py +++ b/tests/integration/test_system_merges/test.py @@ -186,7 +186,7 @@ def test_mutation_simple(started_cluster, replicated): # ALTER will sleep for 3s * 3 (rows) = 9s def alter(): node1.query( - f"ALTER TABLE {name} UPDATE a = 42 WHERE sleep(3) OR 1", + f"ALTER TABLE {name} UPDATE a = 42 WHERE sleep(9) = 0", settings=settings, ) diff --git a/tests/queries/0_stateless/02840_merge__table_or_filter.sql.j2 b/tests/queries/0_stateless/02840_merge__table_or_filter.sql.j2 index a87ef7302c60..286e4545ef78 100644 --- a/tests/queries/0_stateless/02840_merge__table_or_filter.sql.j2 +++ b/tests/queries/0_stateless/02840_merge__table_or_filter.sql.j2 @@ -18,6 +18,11 @@ create view v2 as select * from d2; create table m as v1 engine=Merge(currentDatabase(), '^(v1|v2)$'); +{# -- FIXME: +select _table, key from m where (value = 10 and _table = 'v1') or (value = 20 and _table = 'v1') or 0 or 0 settings {{ settings }}; +select _table, key from m where (value = 10 and _table = 'v3') or (value = 20 and _table = 'v3') or 0 or 0 settings {{ settings }}; +#} + -- avoid reorder set max_threads=1; -- { echoOn } diff --git a/tests/queries/0_stateless/02896_multiple_OR.reference b/tests/queries/0_stateless/02896_multiple_OR.reference new file mode 100644 index 000000000000..96480a75d11e --- /dev/null +++ b/tests/queries/0_stateless/02896_multiple_OR.reference @@ -0,0 +1,14 @@ +-- { echoOn } +SELECT * FROM or_bug WHERE (key = 1) OR false OR false; +1 +SELECT * FROM or_bug WHERE (key = 1) OR false; +1 +SELECT * FROM or_bug WHERE (key = 1); +1 +-- { echoOn } +select * from forms where text_field like '%this%' or 0 = 1 or 0 = 1; +5840ead423829c1eab29fa97 this is a test +select * from forms where text_field like '%this%' or 0 = 1; +5840ead423829c1eab29fa97 this is a test +select * from forms where text_field like '%this%'; +5840ead423829c1eab29fa97 this is a test diff --git a/tests/queries/0_stateless/02896_multiple_OR.sql b/tests/queries/0_stateless/02896_multiple_OR.sql new file mode 100644 index 000000000000..653ddebca1f1 --- /dev/null +++ b/tests/queries/0_stateless/02896_multiple_OR.sql @@ -0,0 +1,28 @@ +-- https://github.com/ClickHouse/ClickHouse/pull/52653 +DROP TABLE IF EXISTS or_bug; +CREATE TABLE or_bug (key UInt8) ENGINE=MergeTree ORDER BY key; +INSERT INTO TABLE or_bug VALUES (0), (1); + +-- { echoOn } +SELECT * FROM or_bug WHERE (key = 1) OR false OR false; +SELECT * FROM or_bug WHERE (key = 1) OR false; +SELECT * FROM or_bug WHERE (key = 1); +-- { echoOff } + +-- https://github.com/ClickHouse/ClickHouse/issues/55288 +DROP TABLE IF EXISTS forms; +CREATE TABLE forms +( + `form_id` FixedString(24), + `text_field` String +) +ENGINE = MergeTree +PRIMARY KEY form_id +ORDER BY form_id; +insert into forms values ('5840ead423829c1eab29fa97','this is a test'); + +-- { echoOn } +select * from forms where text_field like '%this%' or 0 = 1 or 0 = 1; +select * from forms where text_field like '%this%' or 0 = 1; +select * from forms where text_field like '%this%'; +-- { echoOff }