Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix mismatched checksums with column TTLs #9451

Merged
merged 1 commit into from
Feb 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 5 additions & 3 deletions dbms/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include <IO/WriteBufferFromFile.h>
#include <IO/ReadBufferFromFile.h>

#include <unordered_map>
#include <map>

namespace DB
{
Expand Down Expand Up @@ -33,15 +33,17 @@ struct MergeTreeDataPartTTLInfo
/// PartTTLInfo for all columns and table with minimal ttl for whole part
struct MergeTreeDataPartTTLInfos
{
std::unordered_map<String, MergeTreeDataPartTTLInfo> columns_ttl;
/// Order is important as it would be serialized and hashed for checksums
std::map<String, MergeTreeDataPartTTLInfo> columns_ttl;
MergeTreeDataPartTTLInfo table_ttl;

/// `part_min_ttl` and `part_max_ttl` are TTLs which are used for selecting parts
/// to merge in order to remove expired rows.
time_t part_min_ttl = 0;
time_t part_max_ttl = 0;

std::unordered_map<String, MergeTreeDataPartTTLInfo> moves_ttl;
/// Order is important as it would be serialized and hashed for checksums
std::map<String, MergeTreeDataPartTTLInfo> moves_ttl;

void read(ReadBuffer & in);
void write(WriteBuffer & out) const;
Expand Down
36 changes: 36 additions & 0 deletions dbms/tests/integration/test_ttl_replicated/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,43 @@ def test_ttl_columns(started_cluster):
expected = "1\t0\t0\n2\t0\t0\n"
assert TSV(node1.query("SELECT id, a, b FROM test_ttl ORDER BY id")) == TSV(expected)
assert TSV(node2.query("SELECT id, a, b FROM test_ttl ORDER BY id")) == TSV(expected)


def test_ttl_many_columns(started_cluster):
drop_table([node1, node2], "test_ttl_2")
for node in [node1, node2]:
node.query(
'''
CREATE TABLE test_ttl_2(date DateTime, id UInt32,
a Int32 TTL date,
_idx Int32 TTL date,
_offset Int32 TTL date,
_partition Int32 TTL date)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/test_ttl_2', '{replica}')
ORDER BY id PARTITION BY toDayOfMonth(date) SETTINGS merge_with_ttl_timeout=0;
'''.format(replica=node.name))

node1.query("SYSTEM STOP MERGES test_ttl_2")
node2.query("SYSTEM STOP MERGES test_ttl_2")

node1.query("INSERT INTO test_ttl_2 VALUES (toDateTime('2000-10-10 00:00:00'), 1, 2, 3, 4, 5)")
node1.query("INSERT INTO test_ttl_2 VALUES (toDateTime('2100-10-10 10:00:00'), 6, 7, 8, 9, 10)")

# Check that part will appear in result of merge
node1.query("SYSTEM STOP FETCHES test_ttl_2")
node2.query("SYSTEM STOP FETCHES test_ttl_2")

node1.query("SYSTEM START MERGES test_ttl_2")
node2.query("SYSTEM START MERGES test_ttl_2")

time.sleep(1) # sleep to allow use ttl merge selector for second time
node1.query("OPTIMIZE TABLE test_ttl_2 FINAL", timeout=5)

expected = "1\t0\t0\t0\t0\n6\t7\t8\t9\t10\n"
assert TSV(node1.query("SELECT id, a, _idx, _offset, _partition FROM test_ttl_2 ORDER BY id")) == TSV(expected)
assert TSV(node2.query("SELECT id, a, _idx, _offset, _partition FROM test_ttl_2 ORDER BY id")) == TSV(expected)


@pytest.mark.parametrize("delete_suffix", [
"",
"DELETE",
Expand Down