Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport #50590 to 23.5: Fix bug in uniqExact parallel merging #50807

Merged
merged 1 commit into from Jun 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
53 changes: 31 additions & 22 deletions src/AggregateFunctions/UniqExactSet.h
@@ -1,10 +1,11 @@
#pragma once

#include <exception>
#include <Common/CurrentThread.h>
#include <Common/HashTable/HashSet.h>
#include <Common/ThreadPool.h>
#include <Common/setThreadName.h>
#include <Common/scope_guard_safe.h>
#include <Common/setThreadName.h>


namespace DB
Expand Down Expand Up @@ -48,30 +49,38 @@ class UniqExactSet
}
else
{
auto next_bucket_to_merge = std::make_shared<std::atomic_uint32_t>(0);

auto thread_func = [&lhs, &rhs, next_bucket_to_merge, thread_group = CurrentThread::getGroup()]()
try
{
SCOPE_EXIT_SAFE(
if (thread_group)
CurrentThread::detachFromGroupIfNotDetached();
);
if (thread_group)
CurrentThread::attachToGroupIfDetached(thread_group);
setThreadName("UniqExactMerger");
auto next_bucket_to_merge = std::make_shared<std::atomic_uint32_t>(0);

while (true)
auto thread_func = [&lhs, &rhs, next_bucket_to_merge, thread_group = CurrentThread::getGroup()]()
{
const auto bucket = next_bucket_to_merge->fetch_add(1);
if (bucket >= rhs.NUM_BUCKETS)
return;
lhs.impls[bucket].merge(rhs.impls[bucket]);
}
};

for (size_t i = 0; i < std::min<size_t>(thread_pool->getMaxThreads(), rhs.NUM_BUCKETS); ++i)
thread_pool->scheduleOrThrowOnError(thread_func);
thread_pool->wait();
SCOPE_EXIT_SAFE(
if (thread_group)
CurrentThread::detachFromGroupIfNotDetached();
);
if (thread_group)
CurrentThread::attachToGroupIfDetached(thread_group);
setThreadName("UniqExactMerger");

while (true)
{
const auto bucket = next_bucket_to_merge->fetch_add(1);
if (bucket >= rhs.NUM_BUCKETS)
return;
lhs.impls[bucket].merge(rhs.impls[bucket]);
}
};

for (size_t i = 0; i < std::min<size_t>(thread_pool->getMaxThreads(), rhs.NUM_BUCKETS); ++i)
thread_pool->scheduleOrThrowOnError(thread_func);
thread_pool->wait();
}
catch (...)
{
thread_pool->wait();
throw;
}
}
}
}
Expand Down
21 changes: 21 additions & 0 deletions tests/queries/0_stateless/02782_uniq_exact_parallel_merging_bug.sh
@@ -0,0 +1,21 @@
#!/usr/bin/env bash
# Tags: long, no-random-settings, no-tsan, no-asan, no-ubsan, no-msan

# shellcheck disable=SC2154

unset CLICKHOUSE_LOG_COMMENT

CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh


clickhouse-client -q "
CREATE TABLE ${CLICKHOUSE_DATABASE}.t(s String)
ENGINE = MergeTree
ORDER BY tuple();
"

clickhouse-client -q "insert into ${CLICKHOUSE_DATABASE}.t select number%10==0 ? toString(number) : '' from numbers_mt(1e7)"

clickhouse-benchmark -q "select count(distinct s) from ${CLICKHOUSE_DATABASE}.t settings max_memory_usage = '50Mi'" --ignore-error -c 16 -i 1000 2>/dev/null