Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

In KeyCondition: Fixed execution of inversed predicates for non-strictly monotinic functional index #9223

Merged
merged 4 commits into from Feb 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
116 changes: 96 additions & 20 deletions dbms/src/Storages/MergeTree/KeyCondition.cpp
Expand Up @@ -264,6 +264,78 @@ const KeyCondition::AtomMap KeyCondition::atom_map
};


static const std::map<std::string, std::string> inverse_relations = {
{"equals", "notEquals"},
{"notEquals", "equals"},
{"less", "greaterOrEquals"},
{"greaterOrEquals", "less"},
{"greater", "lessOrEquals"},
{"lessOrEquals", "greater"},
{"in", "notIn"},
{"notIn", "in"},
{"like", "notLike"},
{"notLike", "like"},
{"empty", "notEmpty"},
{"notEmpty", "empty"},
};


bool isLogicalOperator(const String & func_name)
{
return (func_name == "and" || func_name == "or" || func_name == "not" || func_name == "indexHint");
}

/// The node can be one of:
/// - Logical operator (AND, OR, NOT and indexHint() - logical NOOP)
/// - An "atom" (relational operator, constant, expression)
/// - A logical constant expression
/// - Any other function
ASTPtr cloneASTWithInversionPushDown(const ASTPtr node, const bool need_inversion = false)
{
const ASTFunction * func = node->as<ASTFunction>();

if (func && isLogicalOperator(func->name))
{
if (func->name == "not")
{
return cloneASTWithInversionPushDown(func->arguments->children.front(), !need_inversion);
}

const auto result_node = makeASTFunction(func->name);

/// indexHint() is a special case - logical NOOP function
if (result_node->name != "indexHint" && need_inversion)
{
result_node->name = (result_node->name == "and") ? "or" : "and";
}

if (func->arguments)
{
for (const auto & child : func->arguments->children)
{
result_node->arguments->children.push_back(cloneASTWithInversionPushDown(child, need_inversion));
}
}

return result_node;
}

const auto cloned_node = node->clone();

if (func && inverse_relations.find(func->name) != inverse_relations.cend())
{
if (need_inversion)
{
cloned_node->as<ASTFunction>()->name = inverse_relations.at(func->name);
}

return cloned_node;
}

return need_inversion ? makeASTFunction("not", cloned_node) : cloned_node;
}


inline bool Range::equals(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateEquals(), lhs, rhs); }
inline bool Range::less(const Field & lhs, const Field & rhs) { return applyVisitor(FieldVisitorAccurateLess(), lhs, rhs); }

Expand Down Expand Up @@ -345,21 +417,23 @@ KeyCondition::KeyCondition(
*/
Block block_with_constants = getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context);

/// Trasform WHERE section to Reverse Polish notation
const auto & select = query_info.query->as<ASTSelectQuery &>();
if (select.where())
{
traverseAST(select.where(), context, block_with_constants);

if (select.prewhere())
{
traverseAST(select.prewhere(), context, block_with_constants);
rpn.emplace_back(RPNElement::FUNCTION_AND);
}
}
else if (select.prewhere())
const ASTSelectQuery & select = query_info.query->as<ASTSelectQuery &>();
if (select.where() || select.prewhere())
{
traverseAST(select.prewhere(), context, block_with_constants);
ASTPtr filter_query;
if (select.where() && select.prewhere())
filter_query = makeASTFunction("and", select.where(), select.prewhere());
else
filter_query = select.where() ? select.where() : select.prewhere();

/** When non-strictly monotonic functions are employed in functional index (e.g. ORDER BY toStartOfHour(dateTime)),
* the use of NOT operator in predicate will result in the indexing algorithm leave out some data.
* This is caused by rewriting in KeyCondition::tryParseAtomFromAST of relational operators to less strict
* when parsing the AST into internal RPN representation.
* To overcome the problem, before parsing the AST we transform it to its semantically equivalent form where all NOT's
* are pushed down and applied (when possible) to leaf nodes.
*/
traverseAST(cloneASTWithInversionPushDown(filter_query), context, block_with_constants);
}
else
{
Expand Down Expand Up @@ -432,9 +506,9 @@ void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Blo
{
RPNElement element;

if (auto * func = node->as<ASTFunction>())
if (const auto * func = node->as<ASTFunction>())
{
if (operatorFromAST(func, element))
if (tryParseLogicalOperatorFromAST(func, element))
{
auto & args = func->arguments->children;
for (size_t i = 0, size = args.size(); i < size; ++i)
Expand All @@ -452,7 +526,7 @@ void KeyCondition::traverseAST(const ASTPtr & node, const Context & context, Blo
}
}

if (!atomFromAST(node, context, block_with_constants, element))
if (!tryParseAtomFromAST(node, context, block_with_constants, element))
{
element.function = RPNElement::FUNCTION_UNKNOWN;
}
Expand Down Expand Up @@ -680,7 +754,7 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value,
}


bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out)
bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out)
{
/** Functions < > = != <= >= in `notIn`, where one argument is a constant, and the other is one of columns of key,
* or itself, wrapped in a chain of possibly-monotonic functions,
Expand Down Expand Up @@ -768,7 +842,9 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
func_name = "lessOrEquals";
else if (func_name == "lessOrEquals")
func_name = "greaterOrEquals";
else if (func_name == "in" || func_name == "notIn" || func_name == "like")
else if (func_name == "in" || func_name == "notIn" ||
func_name == "like" || func_name == "notLike" ||
func_name == "startsWith")
{
/// "const IN data_column" doesn't make sense (unlike "data_column IN const")
return false;
Expand Down Expand Up @@ -809,7 +885,7 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
return false;
}

bool KeyCondition::operatorFromAST(const ASTFunction * func, RPNElement & out)
bool KeyCondition::tryParseLogicalOperatorFromAST(const ASTFunction * func, RPNElement & out)
{
/// Functions AND, OR, NOT.
/** Also a special function `indexHint` - works as if instead of calling a function there are just parentheses
Expand Down
4 changes: 2 additions & 2 deletions dbms/src/Storages/MergeTree/KeyCondition.h
Expand Up @@ -369,8 +369,8 @@ class KeyCondition
BoolMask initial_mask) const;

void traverseAST(const ASTPtr & node, const Context & context, Block & block_with_constants);
bool atomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out);
bool operatorFromAST(const ASTFunction * func, RPNElement & out);
bool tryParseAtomFromAST(const ASTPtr & node, const Context & context, Block & block_with_constants, RPNElement & out);
bool tryParseLogicalOperatorFromAST(const ASTFunction * func, RPNElement & out);

/** Is node the key column
* or expression in which column of key is wrapped by chain of functions,
Expand Down
@@ -0,0 +1,33 @@
TP1
7.51
7.42
7.41
7.42
7.41
7.42
7.41
7.42
7.41
7.51
TP2
7.42
7.41
7.42
7.51
7.42
7.41
7.51
7.51
TP3
7.42
7.41
7.51
TP4
7.42
7.41
7.42
7.42
7.41
TP5
7.41
7.51
@@ -0,0 +1,33 @@
SET max_threads = 1;

CREATE TABLE IF NOT EXISTS functional_index_mergetree (x Float64) ENGINE = MergeTree ORDER BY round(x);
INSERT INTO functional_index_mergetree VALUES (7.42)(7.41)(7.51);

SELECT 'TP1';
SELECT * FROM functional_index_mergetree WHERE x > 7.42;
SELECT * FROM functional_index_mergetree WHERE x < 7.49;
SELECT * FROM functional_index_mergetree WHERE x < 7.5;

SELECT * FROM functional_index_mergetree WHERE NOT (NOT x < 7.49);
SELECT * FROM functional_index_mergetree WHERE NOT (NOT x < 7.5);
SELECT * FROM functional_index_mergetree WHERE NOT (NOT x > 7.42);

SELECT 'TP2';
SELECT * FROM functional_index_mergetree WHERE NOT x > 7.49;
SELECT * FROM functional_index_mergetree WHERE NOT x < 7.42;
SELECT * FROM functional_index_mergetree WHERE NOT x < 7.41;
SELECT * FROM functional_index_mergetree WHERE NOT x < 7.5;

SELECT 'TP3';
SELECT * FROM functional_index_mergetree WHERE x > 7.41 AND x < 7.51;
SELECT * FROM functional_index_mergetree WHERE NOT (x > 7.41 AND x < 7.51);

SELECT 'TP4';
SELECT * FROM functional_index_mergetree WHERE NOT x < 7.41 AND NOT x > 7.49;
SELECT * FROM functional_index_mergetree WHERE NOT x < 7.42 AND NOT x > 7.42;
SELECT * FROM functional_index_mergetree WHERE (NOT x < 7.4) AND (NOT x > 7.49);

SELECT 'TP5';
SELECT * FROM functional_index_mergetree WHERE NOT or(NOT x, toUInt64(x) AND NOT floor(x) > 6, x >= 7.42 AND round(x) <= 7);

DROP TABLE functional_index_mergetree;