From e2ecdf268a82fa3ac0f4c9fe77ab35bca33cc72a Mon Sep 17 00:00:00 2001 From: Sylvain Lebresne Date: Wed, 12 Aug 2020 16:29:43 +0200 Subject: [PATCH] Remove broken "defragment-on-read" optimization The read path for names queries has had a "defragment-on-read" optimization for a while whereby if too many sstables are hit by the read, the result is written back into memtable, in the hope that later reads will only read that newly written data in a single sstable (or at least fewer). The principle of that optimisation does not work however as data is written back with the same timestamp as it originally has and that means future reads cannot know to skip older sstables (at least with the metadata we currently store). As such, this optimisation never saved anything and in fact added load. The patch removes that broken code. Patch by Sylvain Lebresne, reviewed by Aleksey Yeschenko for CASSANDRA-15432 --- CHANGES.txt | 1 + .../db/SinglePartitionReadCommand.java | 28 ------------------- .../AbstractCompactionStrategy.java | 5 ---- .../compaction/CompactionStrategyManager.java | 6 ---- .../SizeTieredCompactionStrategy.java | 6 ---- 5 files changed, 1 insertion(+), 45 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 7d4b7a9fcff6..9b4f8c3e8a43 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,4 +1,5 @@ 3.0.22: + * Remove broken 'defrag-on-read' optimization (CASSANDRA-15432) * Check for endpoint collision with hibernating nodes (CASSANDRA-14599) * Operational improvements and hardening for replica filtering protection (CASSANDRA-15907) * stop_paranoid disk failure policy is ignored on CorruptSSTableException after node is up (CASSANDRA-15191) diff --git a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java index 841c3b9f992a..ca4e8e3758b4 100644 --- a/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java +++ b/src/java/org/apache/cassandra/db/SinglePartitionReadCommand.java @@ -883,7 +883,6 @@ private UnfilteredRowIterator queryMemtableAndSSTablesInTimestampOrder(ColumnFam /* add the SSTables on disk */ Collections.sort(view.sstables, SSTableReader.maxTimestampComparator); - boolean onlyUnrepaired = true; // read sorted sstables SSTableReadMetricsCollector metricsCollector = new SSTableReadMetricsCollector(); for (SSTableReader sstable : view.sstables) @@ -952,9 +951,6 @@ private UnfilteredRowIterator queryMemtableAndSSTablesInTimestampOrder(ColumnFam if (iter.isEmpty()) continue; - if (sstable.isRepaired()) - onlyUnrepaired = false; - result = add( RTBoundValidator.validate(isForThrift() ? ThriftResultsMerger.maybeWrap(iter, nowInSec()) : iter, RTBoundValidator.Stage.SSTABLE, false), result, @@ -972,30 +968,6 @@ private UnfilteredRowIterator queryMemtableAndSSTablesInTimestampOrder(ColumnFam DecoratedKey key = result.partitionKey(); cfs.metric.samplers.get(TableMetrics.Sampler.READS).addSample(key.getKey(), key.hashCode(), 1); - // "hoist up" the requested data into a more recent sstable - if (metricsCollector.getMergedSSTables() > cfs.getMinimumCompactionThreshold() - && onlyUnrepaired - && !cfs.isAutoCompactionDisabled() - && cfs.getCompactionStrategyManager().shouldDefragment()) - { - // !!WARNING!! if we stop copying our data to a heap-managed object, - // we will need to track the lifetime of this mutation as well - Tracing.trace("Defragmenting requested data"); - - try (UnfilteredRowIterator iter = result.unfilteredIterator(columnFilter(), Slices.ALL, false)) - { - final Mutation mutation = new Mutation(PartitionUpdate.fromIterator(iter)); - StageManager.getStage(Stage.MUTATION).execute(new Runnable() - { - public void run() - { - // skipping commitlog and index updates is fine since we're just de-fragmenting existing data - Keyspace.open(mutation.getKeyspaceName()).apply(mutation, false, false); - } - }); - } - } - return result.unfilteredIterator(columnFilter(), Slices.ALL, clusteringIndexFilter().isReversed()); } diff --git a/src/java/org/apache/cassandra/db/compaction/AbstractCompactionStrategy.java b/src/java/org/apache/cassandra/db/compaction/AbstractCompactionStrategy.java index 7219504e9e52..3ceda93c1571 100644 --- a/src/java/org/apache/cassandra/db/compaction/AbstractCompactionStrategy.java +++ b/src/java/org/apache/cassandra/db/compaction/AbstractCompactionStrategy.java @@ -299,11 +299,6 @@ public ScannerList getScanners(Collection sstables, Collection validateOptions(Map options) t return uncheckedOptions; } - @Override - public boolean shouldDefragment() - { - return true; - } - @Override public synchronized void addSSTable(SSTableReader added) {