Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix primary.idx corruption after delete mutation #9048

Merged
merged 6 commits into from Feb 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
34 changes: 34 additions & 0 deletions dbms/src/Interpreters/MutationsInterpreter.cpp
Expand Up @@ -18,6 +18,7 @@
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/ASTOrderByElement.h>
#include <IO/WriteHelpers.h>


Expand Down Expand Up @@ -525,6 +526,39 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
}
select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression));
}
auto metadata = storage->getInMemoryMetadata();
/// We have to execute select in order of primary key
/// because we don't sort results additionaly and don't have
/// any guarantees on data order without ORDER BY. It's almost free, because we
/// have optimization for data read in primary key order.
if (metadata.order_by_ast)
{
ASTPtr dummy;

ASTPtr key_expr;
if (metadata.primary_key_ast)
key_expr = metadata.primary_key_ast;
else
key_expr = metadata.order_by_ast;

bool empty = false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does this check for, something like order by tuple()? Can we just remove this check and pass the order by always, or does it break something? Btw I found that order by tuple() doesn't seem to work, because tuple requires at least one argument, but order by 1 does.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes we cannot order by tuple() and in this case we don't need order by at all.

/// In all other cases we cannot have empty key
if (auto key_function = key_expr->as<ASTFunction>())
empty = key_function->arguments->children.size() == 0;

/// Not explicitely spicified empty key
if (!empty)
{
auto order_by_expr = std::make_shared<ASTOrderByElement>(1, 1, false, dummy, false, dummy, dummy, dummy);


order_by_expr->children.push_back(key_expr);
auto res = std::make_shared<ASTExpressionList>();
res->children.push_back(order_by_expr);

select->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(res));
}
}

return select;
}
Expand Down
6 changes: 6 additions & 0 deletions dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h
Expand Up @@ -47,6 +47,12 @@ class StorageFromMergeTreeDataPart : public ext::shared_ptr_helper<StorageFromMe
return part->storage.mayBenefitFromIndexForIn(left_in_operand, query_context);
}

StorageInMemoryMetadata getInMemoryMetadata() const override
{
return part->storage.getInMemoryMetadata();
}


protected:
StorageFromMergeTreeDataPart(const MergeTreeData::DataPartPtr & part_)
: IStorage(getIDFromPart(part_), part_->storage.getVirtuals())
Expand Down
16 changes: 9 additions & 7 deletions dbms/src/Storages/StorageReplicatedMergeTree.cpp
Expand Up @@ -3222,6 +3222,7 @@ void StorageReplicatedMergeTree::alter(
/// metadata alter.
if (params.isSettingsAlter())
{
lockStructureExclusively(table_lock_holder, query_context.getCurrentQueryId());
/// We don't replicate storage_settings_ptr ALTER. It's local operation.
/// Also we don't upgrade alter lock to table structure lock.
LOG_DEBUG(log, "ALTER storage_settings_ptr only");
Expand Down Expand Up @@ -3271,9 +3272,7 @@ void StorageReplicatedMergeTree::alter(
std::vector<ChangedNode> changed_nodes;

{
/// Just to read current structure. Alter will be done in separate thread.
auto table_lock = lockStructureForShare(false, query_context.getCurrentQueryId());

/// We can safely read structure, because we guarded with alter_intention_lock
if (is_readonly)
throw Exception("Can't ALTER readonly table", ErrorCodes::TABLE_IS_READ_ONLY);

Expand Down Expand Up @@ -3305,10 +3304,13 @@ void StorageReplicatedMergeTree::alter(

/// Perform settings update locally

auto old_metadata = getInMemoryMetadata();
old_metadata.settings_ast = metadata.settings_ast;
changeSettings(metadata.settings_ast, table_lock_holder);
global_context.getDatabase(table_id.database_name)->alterTable(query_context, table_id.table_name, old_metadata);
{
lockStructureExclusively(table_lock_holder, query_context.getCurrentQueryId());
auto old_metadata = getInMemoryMetadata();
old_metadata.settings_ast = metadata.settings_ast;
changeSettings(metadata.settings_ast, table_lock_holder);
global_context.getDatabase(table_id.database_name)->alterTable(query_context, table_id.table_name, old_metadata);
}

/// Modify shared metadata nodes in ZooKeeper.
Coordination::Requests ops;
Expand Down
@@ -0,0 +1,4 @@
43200
0
43200
43200
@@ -0,0 +1,60 @@
#!/usr/bin/env bash

CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh


$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS movement"

$CLICKHOUSE_CLIENT -n --query "CREATE TABLE movement (date DateTime('Europe/Moscow')) Engine = MergeTree ORDER BY (toStartOfHour(date));"

$CLICKHOUSE_CLIENT --query "insert into movement select toDateTime('2020-01-22 00:00:00') + number%(23*3600) from numbers(1000000);"

$CLICKHOUSE_CLIENT --query "OPTIMIZE TABLE movement FINAL"

$CLICKHOUSE_CLIENT -n --query "
SELECT
count(),
toStartOfHour(date) AS Hour
FROM movement
WHERE (date >= toDateTime('2020-01-22T10:00:00')) AND (date <= toDateTime('2020-01-22T23:00:00'))
GROUP BY Hour
ORDER BY Hour DESC
" | grep "16:00:00" | cut -f1


$CLICKHOUSE_CLIENT --query "alter table movement delete where date >= toDateTime('2020-01-22T16:00:00') and date < toDateTime('2020-01-22T17:00:00') SETTINGS mutations_sync = 2"

$CLICKHOUSE_CLIENT -n --query "
SELECT
count(),
toStartOfHour(date) AS Hour
FROM movement
WHERE (date >= toDateTime('2020-01-22T10:00:00')) AND (date <= toDateTime('2020-01-22T23:00:00'))
GROUP BY Hour
ORDER BY Hour DESC
" | grep "16:00:00" | wc -l


$CLICKHOUSE_CLIENT -n --query "
SELECT
count(),
toStartOfHour(date) AS Hour
FROM movement
WHERE (date >= toDateTime('2020-01-22T10:00:00')) AND (date <= toDateTime('2020-01-22T23:00:00'))
GROUP BY Hour
ORDER BY Hour DESC
" | grep "22:00:00" | cut -f1


$CLICKHOUSE_CLIENT -n --query "
SELECT
count(),
toStartOfHour(date) AS Hour
FROM movement
GROUP BY Hour
ORDER BY Hour DESC
" | grep "22:00:00" | cut -f1


$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS movement"