From 56cb42d200dd999342c4cd5c92fbfa271ba91153 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Tue, 20 Nov 2018 12:56:20 +0100 Subject: [PATCH] LUCENE-8569: Never count soft-deletes if reader has no hard-deletes Today we count the actual soft-deletes during a merge which is unnecessary if there are no hard-deletes present. In this case, which is considered to be the common case we can get accurate counts by substracting the number of deleted docs in the wrapped reader from the number of soft-deletes in that reader. --- .../org/apache/lucene/index/IndexWriter.java | 84 ++++++++++++------- 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 028554b5eda7..f8415829cfd5 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -69,6 +69,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Constants; +import org.apache.lucene.util.Counter; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.StringHelper; @@ -4352,6 +4353,36 @@ private synchronized void closeMergeReaders(MergePolicy.OneMerge merge, boolean } } + private void countSoftDeletes(CodecReader reader, Bits wrappedLiveDocs, Bits hardLiveDocs, Counter softDeleteCounter, + Counter hardDeleteCounter) throws IOException { + int hardDeleteCount = 0; + int softDeletesCount = 0; + DocIdSetIterator softDeletedDocs = DocValuesFieldExistsQuery.getDocValuesDocIdSetIterator(config.getSoftDeletesField(), reader); + if (softDeletedDocs != null) { + int docId; + while ((docId = softDeletedDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + if (wrappedLiveDocs == null || wrappedLiveDocs.get(docId)) { + if (hardLiveDocs == null || hardLiveDocs.get(docId)) { + softDeletesCount++; + } else { + hardDeleteCount++; + } + } + } + } + softDeleteCounter.addAndGet(softDeletesCount); + hardDeleteCounter.addAndGet(hardDeleteCount); + } + + private boolean assertSoftDeletesCount(CodecReader reader, int expectedCount) throws IOException { + Counter count = Counter.newCounter(false); + Counter hardDeletes = Counter.newCounter(false); + countSoftDeletes(reader, reader.getLiveDocs(), null, count, hardDeletes); + assert count.get() == expectedCount : "soft-deletes count mismatch expected: " + + expectedCount + " but actual: " + count.get() ; + return true; + } + /** Does the actual (time-consuming) work of the merge, * but without holding synchronized lock on IndexWriter * instance */ @@ -4400,7 +4431,7 @@ private int mergeMiddle(MergePolicy.OneMerge merge, MergePolicy mergePolicy) thr // Let the merge wrap readers List mergeReaders = new ArrayList<>(); - int softDeleteCount = 0; + Counter softDeleteCount = Counter.newCounter(false); for (int r = 0; r < merge.readers.size(); r++) { SegmentReader reader = merge.readers.get(r); CodecReader wrappedReader = merge.wrapForMerge(reader); @@ -4408,34 +4439,31 @@ private int mergeMiddle(MergePolicy.OneMerge merge, MergePolicy mergePolicy) thr if (softDeletesEnabled) { if (reader != wrappedReader) { // if we don't have a wrapped reader we won't preserve any soft-deletes Bits hardLiveDocs = merge.hardLiveDocs.get(r); - Bits wrappedLiveDocs = wrappedReader.getLiveDocs(); - int hardDeleteCount = 0; - DocIdSetIterator softDeletedDocs = DocValuesFieldExistsQuery.getDocValuesDocIdSetIterator(config.getSoftDeletesField(), wrappedReader); - if (softDeletedDocs != null) { - int docId; - while ((docId = softDeletedDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - if (wrappedLiveDocs == null || wrappedLiveDocs.get(docId)) { - if (hardLiveDocs == null || hardLiveDocs.get(docId)) { - softDeleteCount++; - } else { - hardDeleteCount++; + if (hardLiveDocs != null) { // we only need to do this accounting if we have mixed deletes + Bits wrappedLiveDocs = wrappedReader.getLiveDocs(); + Counter hardDeleteCounter = Counter.newCounter(false); + countSoftDeletes(wrappedReader, wrappedLiveDocs, hardLiveDocs, softDeleteCount, hardDeleteCounter); + int hardDeleteCount = Math.toIntExact(hardDeleteCounter.get()); + // Wrap the wrapped reader again if we have excluded some hard-deleted docs + if (hardDeleteCount > 0) { + Bits liveDocs = wrappedLiveDocs == null ? hardLiveDocs : new Bits() { + @Override + public boolean get(int index) { + return hardLiveDocs.get(index) && wrappedLiveDocs.get(index); } - } + + @Override + public int length() { + return hardLiveDocs.length(); + } + }; + wrappedReader = FilterCodecReader.wrapLiveDocs(wrappedReader, liveDocs, wrappedReader.numDocs() - hardDeleteCount); } - } - // Wrap the wrapped reader again if we have excluded some hard-deleted docs - if (hardLiveDocs != null && hardDeleteCount > 0) { - Bits liveDocs = wrappedLiveDocs == null ? hardLiveDocs : new Bits() { - @Override - public boolean get(int index) { - return hardLiveDocs.get(index) && wrappedLiveDocs.get(index); - } - @Override - public int length() { - return hardLiveDocs.length(); - } - }; - wrappedReader = FilterCodecReader.wrapLiveDocs(wrappedReader, liveDocs, wrappedReader.numDocs() - hardDeleteCount); + } else { + final int carryOverSoftDeletes = reader.getSegmentInfo().getSoftDelCount() - wrappedReader.numDeletedDocs(); + assert carryOverSoftDeletes >= 0 : "carry-over soft-deletes must be positive"; + assert assertSoftDeletesCount(wrappedReader, carryOverSoftDeletes); + softDeleteCount.addAndGet(carryOverSoftDeletes); } } } @@ -4445,7 +4473,7 @@ public int length() { merge.info.info, infoStream, dirWrapper, globalFieldNumberMap, context); - merge.info.setSoftDelCount(softDeleteCount); + merge.info.setSoftDelCount(Math.toIntExact(softDeleteCount.get())); merge.checkAborted(); merge.mergeStartNS = System.nanoTime();