Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support optimize where clause with sorting key expression move to prewhere for query with final #38950

Merged
merged 15 commits into from
Feb 15, 2023
Merged
22 changes: 17 additions & 5 deletions src/Functions/sleep.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,17 @@ class FunctionSleep : public IFunction

return std::make_shared<DataTypeUInt8>();
}
ColumnPtr executeImplDryRun(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
{
return execute(arguments, result_type, true);
}

ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
{
return execute(arguments, result_type, false);
}

ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, bool dry_run) const
{
const IColumn * col = arguments[0].column.get();

Expand All @@ -99,11 +108,14 @@ class FunctionSleep : public IFunction
if (seconds > 3.0) /// The choice is arbitrary
throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {}", toString(seconds));

UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size);
UInt64 microseconds = static_cast<UInt64>(seconds * count * 1e6);
sleepForMicroseconds(microseconds);
ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count);
ProfileEvents::increment(ProfileEvents::SleepFunctionMicroseconds, microseconds);
if (!dry_run)
{
UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size);
UInt64 microseconds = static_cast<UInt64>(seconds * count * 1e6);
sleepForMicroseconds(microseconds);
ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count);
ProfileEvents::increment(ProfileEvents::SleepFunctionMicroseconds, microseconds);
}
}

/// convertToFullColumn needed, because otherwise (constant expression case) function will not get called on each columns.
Expand Down
51 changes: 11 additions & 40 deletions src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,6 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer(
, log{log_}
, column_sizes{std::move(column_sizes_)}
{
const auto & primary_key = metadata_snapshot->getPrimaryKey();
if (!primary_key.column_names.empty())
first_primary_key_column = primary_key.column_names[0];

for (const auto & name : queried_columns)
{
auto it = column_sizes.find(name);
Expand Down Expand Up @@ -193,8 +189,9 @@ void MergeTreeWhereOptimizer::analyzeImpl(Conditions & res, const ASTPtr & node,
/// Condition depend on some column. Constant expressions are not moved.
!cond.identifiers.empty()
&& !cannotBeMoved(node, is_final)
/// Do not take into consideration the conditions consisting only of the first primary key column
&& !hasPrimaryKeyAtoms(node)
/// When use final, do not take into consideration the conditions with non-sorting keys. Because final select
/// need to use all sorting keys, it will cause correctness issues if we filter other columns before final merge.
&& (!is_final || isExpressionOverSortingKey(node))
/// Only table columns are considered. Not array joined columns. NOTE We're assuming that aliases was expanded.
&& isSubsetOfTableColumns(cond.identifiers)
/// Do not move conditions involving all queried columns.
Expand Down Expand Up @@ -320,48 +317,22 @@ UInt64 MergeTreeWhereOptimizer::getIdentifiersColumnSize(const NameSet & identif
return size;
}


bool MergeTreeWhereOptimizer::hasPrimaryKeyAtoms(const ASTPtr & ast) const
bool MergeTreeWhereOptimizer::isExpressionOverSortingKey(const ASTPtr & ast) const
{
if (const auto * func = ast->as<ASTFunction>())
{
const auto & args = func->arguments->children;

if ((func->name == "not" && 1 == args.size()) || func->name == "and" || func->name == "or")
for (const auto & arg : args)
{
for (const auto & arg : args)
if (hasPrimaryKeyAtoms(arg))
return true;

return false;
if (isConstant(ast) || sorting_key_names.contains(arg->getColumnName()))
continue;
if (!isExpressionOverSortingKey(arg))
return false;
}
return true;
}

return isPrimaryKeyAtom(ast);
}


bool MergeTreeWhereOptimizer::isPrimaryKeyAtom(const ASTPtr & ast) const
{
if (const auto * func = ast->as<ASTFunction>())
{
if (!KeyCondition::atom_map.contains(func->name))
return false;

const auto & args = func->arguments->children;
if (args.size() != 2)
return false;

const auto & first_arg_name = args.front()->getColumnName();
const auto & second_arg_name = args.back()->getColumnName();

if ((first_primary_key_column == first_arg_name && isConstant(args[1]))
|| (first_primary_key_column == second_arg_name && isConstant(args[0]))
|| (first_primary_key_column == first_arg_name && functionIsInOrGlobalInOperator(func->name)))
return true;
}

return false;
return isConstant(ast) || sorting_key_names.contains(ast->getColumnName());
}


Expand Down
5 changes: 1 addition & 4 deletions src/Storages/MergeTree/MergeTreeWhereOptimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,7 @@ class MergeTreeWhereOptimizer : private boost::noncopyable

UInt64 getIdentifiersColumnSize(const NameSet & identifiers) const;

bool hasPrimaryKeyAtoms(const ASTPtr & ast) const;

bool isPrimaryKeyAtom(const ASTPtr & ast) const;
bool isExpressionOverSortingKey(const ASTPtr & ast) const;

bool isSortingKey(const String & column_name) const;

Expand All @@ -105,7 +103,6 @@ class MergeTreeWhereOptimizer : private boost::noncopyable

using StringSet = std::unordered_set<std::string>;

String first_primary_key_column;
const StringSet table_columns;
const Names queried_columns;
const NameSet sorting_key_names;
Expand Down
28 changes: 14 additions & 14 deletions tests/queries/0_stateless/00597_push_down_predicate_long.reference
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ FROM
name,
value
FROM test_00597
WHERE id = 1
PREWHERE id = 1
)
WHERE id = 1
2000-01-01 1 test string 1 1
Expand All @@ -203,7 +203,7 @@ FROM
name,
value
FROM test_00597
WHERE id = 1
PREWHERE id = 1
)
WHERE id = 1
)
Expand All @@ -229,7 +229,7 @@ FROM
name,
value
FROM test_00597
WHERE id = 1
PREWHERE id = 1
) AS b
WHERE id = 1
)
Expand All @@ -248,7 +248,7 @@ FROM
name,
value
FROM test_00597
WHERE id = 1
PREWHERE id = 1
)
WHERE id = 1
2000-01-01 1 test string 1 1
Expand All @@ -272,7 +272,7 @@ FROM
name,
value
FROM test_00597
WHERE id = 1
PREWHERE id = 1
)
WHERE id = 1
)
Expand All @@ -291,7 +291,7 @@ FROM
name,
value
FROM test_00597
WHERE id = 1
PREWHERE id = 1
) AS b
WHERE id = 1
2000-01-01 1 test string 1 1
Expand All @@ -315,7 +315,7 @@ FROM
name,
value
FROM test_00597
WHERE id = 1
PREWHERE id = 1
) AS a
WHERE id = 1
) AS b
Expand All @@ -332,7 +332,7 @@ FROM
date,
min(value) AS value
FROM test_00597
WHERE id = 1
PREWHERE id = 1
GROUP BY
id,
date
Expand All @@ -352,15 +352,15 @@ FROM
name,
value
FROM test_00597
WHERE id = 1
PREWHERE id = 1
UNION ALL
SELECT
date,
id,
name,
value
FROM test_00597
WHERE id = 1
PREWHERE id = 1
)
WHERE id = 1
2000-01-01 1 test string 1 1
Expand All @@ -381,7 +381,7 @@ FROM
name,
value
FROM test_00597
WHERE id = 1
PREWHERE id = 1
)
ANY LEFT JOIN
(
Expand Down Expand Up @@ -441,7 +441,7 @@ FROM
name,
value
FROM test_00597
WHERE id = 1
PREWHERE id = 1
)
ANY LEFT JOIN
(
Expand Down Expand Up @@ -532,7 +532,7 @@ FROM
name,
value
FROM test_00597
WHERE id = 1
PREWHERE id = 1
) AS a
ANY LEFT JOIN
(
Expand Down Expand Up @@ -579,7 +579,7 @@ SEMI LEFT JOIN
name,
value
FROM test_00597
WHERE id = 1
PREWHERE id = 1
)
WHERE id = 1
) AS r USING (id)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ FROM
n,
finalizeAggregation(s)
FROM test_00808_push_down_with_finalizeAggregation
WHERE (n <= 5) AND (n >= 2)
PREWHERE (n <= 5) AND (n >= 2)
)
WHERE (n >= 2) AND (n <= 5)
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,8 @@ select * from (select * from tab where (a + b) * c = 8 union all select * from t
select * from (explain plan actions = 1 select * from (select * from tab where (a + b) * c = 8 union all select * from tab3 where (a + b) * c = 18) order by sin(a / b)) where explain like '%sort description%' or explain like '%ReadType%';
Prefix sort description: sin(divide(a, b)) ASC
Result sort description: sin(divide(a, b)) ASC
ReadType: InOrder
ReadType: InOrder
ReadType: InOrder
ReadType: InOrder
select * from (select * from tab where (a + b) * c = 8 union all select * from tab4) order by sin(a / b);
2 2 2 2
2 2 2 2
Expand All @@ -311,7 +311,7 @@ select * from (select * from tab where (a + b) * c = 8 union all select * from t
select * from (explain plan actions = 1 select * from (select * from tab where (a + b) * c = 8 union all select * from tab4) order by sin(a / b)) where explain like '%sort description%' or explain like '%ReadType%';
Prefix sort description: sin(divide(a, b)) ASC
Result sort description: sin(divide(a, b)) ASC
ReadType: InOrder
ReadType: InOrder
ReadType: InOrder
select * from (select * from tab union all select * from tab5) order by (a + b) * c;
0 0 0 0
Expand Down Expand Up @@ -403,3 +403,8 @@ select * from (explain plan actions = 1 select * from (select * from tab union a
Sort description: multiply(plus(a, b), c) ASC, sin(divide(a, b)) ASC, d ASC
Limit 3
ReadType: Default
drop table if exists tab;
drop table if exists tab2;
drop table if exists tab3;
drop table if exists tab4;
drop table if exists tab5;
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
SET optimize_read_in_order = 1, query_plan_read_in_order=1;

drop table if exists tab;
drop table if exists tab2;
drop table if exists tab3;
drop table if exists tab4;
drop table if exists tab5;

create table tab (a UInt32, b UInt32, c UInt32, d UInt32) engine = MergeTree order by ((a + b) * c, sin(a / b));
insert into tab select number, number, number, number from numbers(5);
insert into tab select number, number, number, number from numbers(5);
Expand Down Expand Up @@ -142,3 +148,9 @@ select * from (explain plan actions = 1 select * from (select * from tab union a
-- In case of tab4, we do full sorting by ((a + b) * c, sin(a / b), d) with LIMIT. We can replace it to sorting by ((a + b) * c, sin(a / b)) and LIMIT WITH TIES, when sorting alog support it.
select * from (select * from tab union all select * from tab5 union all select * from tab4) order by (a + b) * c, sin(a / b), d limit 3;
select * from (explain plan actions = 1 select * from (select * from tab union all select * from tab5 union all select * from tab4) order by (a + b) * c, sin(a / b), d limit 3) where explain ilike '%sort description%' or explain like '%ReadType%' or explain like '%Limit%';

drop table if exists tab;
drop table if exists tab2;
drop table if exists tab3;
drop table if exists tab4;
drop table if exists tab5;
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
optimize_move_to_prewhere_if_final = 1

SELECT
x,
y,
z
FROM prewhere_move_select_final
PREWHERE x > 100

SELECT
x,
y,
z
FROM prewhere_move_select_final
FINAL
PREWHERE x > 100

SELECT
x,
y,
Expand All @@ -15,6 +30,21 @@ FROM prewhere_move_select_final
FINAL
PREWHERE y > 100

SELECT
x,
y,
z
FROM prewhere_move_select_final
PREWHERE (x + y) > 100

SELECT
x,
y,
z
FROM prewhere_move_select_final
FINAL
PREWHERE (x + y) > 100

SELECT
x,
y,
Expand All @@ -32,6 +62,24 @@ FINAL
PREWHERE y > 100
WHERE (y > 100) AND (z > 400)

SELECT
x,
y,
z
FROM prewhere_move_select_final
FINAL
PREWHERE x > 50
WHERE (x > 50) AND (z > 400)

SELECT
x,
y,
z
FROM prewhere_move_select_final
FINAL
PREWHERE (x + y) > 50
WHERE ((x + y) > 50) AND (z > 400)

optimize_move_to_prewhere_if_final = 0

SELECT
Expand Down