Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not apply projection if read-in-order was enabled. #50923

Merged
merged 4 commits into from Jun 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/Processors/QueryPlan/Optimizations/projectionsCommon.cpp
Expand Up @@ -38,6 +38,9 @@ bool canUseProjectionForReadingStep(ReadFromMergeTree * reading)
if (reading->isParallelReadingEnabled())
return false;

if (reading->readsInOrder())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@KochetovNicolai sometimes projections can be beneficial, even when read-in-order is possible. Of course to make the right choice there should be some statistics, but for now, if the user created such a projection maybe it is better to assume that he/she knew what he/she is doing and it will be better to disable in-order read for such case instead of disabling projections. What do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly this issue #58912

return false;

// Currently projection don't support deduplication when moving parts between shards.
if (reading->getContext()->getSettingsRef().allow_experimental_query_deduplication)
return false;
Expand Down
5 changes: 5 additions & 0 deletions src/Processors/QueryPlan/ReadFromMergeTree.cpp
Expand Up @@ -1427,6 +1427,11 @@ bool ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction,
return true;
}

bool ReadFromMergeTree::readsInOrder() const
{
return reader_settings.read_in_order;
}

void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value)
{
query_info.prewhere_info = prewhere_info_value;
Expand Down
1 change: 1 addition & 0 deletions src/Processors/QueryPlan/ReadFromMergeTree.h
Expand Up @@ -161,6 +161,7 @@ class ReadFromMergeTree final : public SourceStepWithFilter

/// Returns `false` if requested reading cannot be performed.
bool requestReadingInOrder(size_t prefix_size, int direction, size_t limit);
bool readsInOrder() const;

void updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value);

Expand Down
@@ -0,0 +1,4 @@
00000000-0000-0000-0000-000000000000 1643760000 0
00000000-0000-0000-0000-000000000000 1643760000 0
00000000-0000-0000-0000-000000000000 1643760000 0
00000000-0000-0000-0000-000000000000 1643760000 0
44 changes: 44 additions & 0 deletions tests/queries/0_stateless/02784_projections_read_in_order_bug.sql
@@ -0,0 +1,44 @@
create table events (
`organisation_id` UUID,
`session_id` UUID,
`id` UUID DEFAULT generateUUIDv4(),
`timestamp` UInt64,
`payload` String,
`customer_id` UUID,
`call_id` String,
PROJECTION events_by_session_and_org
(
SELECT *
ORDER BY
organisation_id,
session_id,
timestamp
),
PROJECTION events_by_session
(
SELECT *
ORDER BY
session_id,
timestamp
),
PROJECTION events_by_session_and_customer
(
SELECT *
ORDER BY
customer_id,
session_id,
timestamp
),
PROJECTION events_by_call_id
(
SELECT *
ORDER BY
call_id,
timestamp
)) engine = MergeTree order by (organisation_id, session_id, timestamp) settings index_granularity = 3;

insert into events values (reinterpretAsUUID(0), reinterpretAsUUID(1), reinterpretAsUUID(0), toDateTime('2022-02-02', 'UTC'), toString(0), reinterpretAsUUID(0), toString(0)), (reinterpretAsUUID(0), reinterpretAsUUID(1), reinterpretAsUUID(0), toDateTime('2022-02-02', 'UTC'), toString(0), reinterpretAsUUID(0), toString(0)), (reinterpretAsUUID(1), reinterpretAsUUID(0), reinterpretAsUUID(0), toDateTime('2022-02-02', 'UTC'), toString(0), reinterpretAsUUID(0), toString(0)), (reinterpretAsUUID(1), reinterpretAsUUID(0), reinterpretAsUUID(0), toDateTime('2022-02-02', 'UTC'), toString(0), reinterpretAsUUID(0), toString(0)), (reinterpretAsUUID(3), reinterpretAsUUID(2), reinterpretAsUUID(0), toDateTime('2022-02-02', 'UTC'), toString(0), reinterpretAsUUID(0), toString(0)), (reinterpretAsUUID(3), reinterpretAsUUID(2), reinterpretAsUUID(0), toDateTime('2022-02-02', 'UTC'), toString(0), reinterpretAsUUID(0), toString(0));
insert into events values (reinterpretAsUUID(0), reinterpretAsUUID(1), reinterpretAsUUID(0), toDateTime('2022-02-02', 'UTC'), toString(0), reinterpretAsUUID(0), toString(0)), (reinterpretAsUUID(0), reinterpretAsUUID(1), reinterpretAsUUID(0), toDateTime('2022-02-02', 'UTC'), toString(0), reinterpretAsUUID(0), toString(0)), (reinterpretAsUUID(1), reinterpretAsUUID(0), reinterpretAsUUID(0), toDateTime('2022-02-02', 'UTC'), toString(0), reinterpretAsUUID(0), toString(0)), (reinterpretAsUUID(1), reinterpretAsUUID(0), reinterpretAsUUID(0), toDateTime('2022-02-02', 'UTC'), toString(0), reinterpretAsUUID(0), toString(0)), (reinterpretAsUUID(3), reinterpretAsUUID(2), reinterpretAsUUID(0), toDateTime('2022-02-02', 'UTC'), toString(0), reinterpretAsUUID(0), toString(0)), (reinterpretAsUUID(3), reinterpretAsUUID(2), reinterpretAsUUID(0), toDateTime('2022-02-02', 'UTC'), toString(0), reinterpretAsUUID(0), toString(0));

set read_in_order_two_level_merge_threshold=1;
SELECT id, timestamp, payload FROM events WHERE (organisation_id = reinterpretAsUUID(1)) AND (session_id = reinterpretAsUUID(0)) ORDER BY timestamp, payload, id ASC;