From b8a87abba47441d97b3c85a0473d02919e1c071b Mon Sep 17 00:00:00 2001 From: Bereng Date: Fri, 9 Dec 2022 11:13:52 +0100 Subject: [PATCH] Do not leak 2015 synthetic memtable Epoch patch by Berenguer Blasi; reviewed by Caleb Rackliffe for CASSANDRA-18118 --- build.xml | 2 ++ .../org/apache/cassandra/db/Memtable.java | 20 ++++++++++++++++++- .../db/SinglePartitionReadCommand.java | 3 ++- .../db/compaction/CompactionController.java | 16 ++++++++++----- .../cassandra/db/ColumnFamilyStoreTest.java | 9 +++++++++ 5 files changed, 43 insertions(+), 7 deletions(-) diff --git a/build.xml b/build.xml index 5e15f4923d7f..590e523fcefb 100644 --- a/build.xml +++ b/build.xml @@ -1227,6 +1227,8 @@ + + diff --git a/src/java/org/apache/cassandra/db/Memtable.java b/src/java/org/apache/cassandra/db/Memtable.java index ae8b8d3a74a6..9cfcb2f830a6 100644 --- a/src/java/org/apache/cassandra/db/Memtable.java +++ b/src/java/org/apache/cassandra/db/Memtable.java @@ -66,6 +66,7 @@ public class Memtable implements Comparable private static final Logger logger = LoggerFactory.getLogger(Memtable.class); public static final MemtablePool MEMORY_POOL = createMemtableAllocatorPool(); + public static final long NO_MIN_TIMESTAMP = -1; private static MemtablePool createMemtableAllocatorPool() { @@ -161,6 +162,16 @@ public Memtable(CFMetaData metadata) this.columnsCollector = new ColumnsCollector(metadata.partitionColumns()); } + @VisibleForTesting + public Memtable(CFMetaData metadata, long minTimestamp) + { + this.initialComparator = metadata.comparator; + this.cfs = null; + this.allocator = null; + this.columnsCollector = new ColumnsCollector(metadata.partitionColumns()); + this.minTimestamp = minTimestamp; + } + public MemtableAllocator getAllocator() { return allocator; @@ -387,9 +398,16 @@ public Partition getPartition(DecoratedKey key) return partitions.get(key); } + /** + * Returns the minTS if one available, otherwise NO_MIN_TIMESTAMP. + * + * EncodingStats uses a synthetic epoch TS at 2015. We don't want to leak that (CASSANDRA-18118) so we return NO_MIN_TIMESTAMP instead. + * + * @return The minTS or NO_MIN_TIMESTAMP if none available + */ public long getMinTimestamp() { - return minTimestamp; + return minTimestamp != EncodingStats.NO_STATS.minTimestamp ? minTimestamp : NO_MIN_TIMESTAMP; } /** diff --git a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java index a97243be2f3c..dfdcc030141e 100644 --- a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java +++ b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java @@ -713,7 +713,8 @@ private UnfilteredRowIterator queryMemtableAndDiskInternal(ColumnFamilyStore cfs if (partition == null) continue; - minTimestamp = Math.min(minTimestamp, memtable.getMinTimestamp()); + if (memtable.getMinTimestamp() != Memtable.NO_MIN_TIMESTAMP) + minTimestamp = Math.min(minTimestamp, memtable.getMinTimestamp()); @SuppressWarnings("resource") // 'iter' is added to iterators which is closed on exception, or through the closing of the final merged iterator UnfilteredRowIterator iter = filter.getUnfilteredRowIterator(columnFilter(), partition); diff --git a/src/java/org/apache/cassandra/db/compaction/CompactionController.java b/src/java/org/apache/cassandra/db/compaction/CompactionController.java index 84aac090523c..19318ff1a901 100644 --- a/src/java/org/apache/cassandra/db/compaction/CompactionController.java +++ b/src/java/org/apache/cassandra/db/compaction/CompactionController.java @@ -207,7 +207,10 @@ public static Set getFullyExpiredSSTables(ColumnFamilyStore cfSto } for (Memtable memtable : cfStore.getTracker().getView().getAllMemtables()) - minTimestamp = Math.min(minTimestamp, memtable.getMinTimestamp()); + { + if (memtable.getMinTimestamp() != Memtable.NO_MIN_TIMESTAMP) + minTimestamp = Math.min(minTimestamp, memtable.getMinTimestamp()); + } // At this point, minTimestamp denotes the lowest timestamp of any relevant // SSTable or Memtable that contains a constructive value. candidates contains all the @@ -281,11 +284,14 @@ public Predicate getPurgeEvaluator(DecoratedKey key) for (Memtable memtable : memtables) { - Partition partition = memtable.getPartition(key); - if (partition != null) + if (memtable.getMinTimestamp() != Memtable.NO_MIN_TIMESTAMP) { - minTimestampSeen = Math.min(minTimestampSeen, partition.stats().minTimestamp); - hasTimestamp = true; + Partition partition = memtable.getPartition(key); + if (partition != null) + { + minTimestampSeen = Math.min(minTimestampSeen, partition.stats().minTimestamp); + hasTimestamp = true; + } } } diff --git a/test/unit/org/apache/cassandra/db/ColumnFamilyStoreTest.java b/test/unit/org/apache/cassandra/db/ColumnFamilyStoreTest.java index 3987a29f2dad..8e8e9c94b75f 100644 --- a/test/unit/org/apache/cassandra/db/ColumnFamilyStoreTest.java +++ b/test/unit/org/apache/cassandra/db/ColumnFamilyStoreTest.java @@ -98,6 +98,15 @@ public void truncateCFS() Keyspace.open(KEYSPACE2).getColumnFamilyStore(CF_STANDARD1).truncateBlocking(); } + @Test + public void testMemtableTimestamp() throws Throwable + { + assertEquals(Memtable.NO_MIN_TIMESTAMP, + (new Memtable(Keyspace.open(KEYSPACE1).getColumnFamilyStore(CF_STANDARD1).metadata, + EncodingStats.NO_STATS.minTimestamp)) + .getMinTimestamp()); + } + @Test // create two sstables, and verify that we only deserialize data from the most recent one public void testTimeSortedQuery()