Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete all live docs when query matched a whole segment. #13395

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -408,10 +408,19 @@ private long applyQueryDeletes(BufferedUpdatesStream.SegmentState[] segStates)
}
}
} else {
int docID;
while ((docID = it.nextDoc()) < limit) {
if (segState.rld.delete(docID)) {
delCount++;
// TODO: Cant we just set this segment fully deleted, without keeping the correct
// deleted count?
// TODO: We just Implemented isMatchAll for PointRangeQuery, Does TermRangeQuery needs
// this?
if (scorer.isMatchAll()) {
delCount += segState.rld.deleteAll();
assert segState.rld.isFullyDeleted();
} else {
int docID;
while ((docID = it.nextDoc()) < limit) {
if (segState.rld.delete(docID)) {
delCount++;
}
}
}
}
Expand Down Expand Up @@ -468,6 +477,8 @@ private long applyTermDeletes(BufferedUpdatesStream.SegmentState[] segStates) th
final DocIdSetIterator iterator = termDocsIterator.nextTerm(iter.field(), delTerm);
if (iterator != null) {
int docID;
// It is unusual that one term matches all docs, so it is worthless to implement delete
// all.
while ((docID = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
// NOTE: there is no limit check on the docID
// when deleting by Term (unlike by Query)
Expand Down
13 changes: 13 additions & 0 deletions lucene/core/src/java/org/apache/lucene/index/PendingDeletes.java
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,19 @@ boolean delete(int docID) throws IOException {
return didDelete;
}

/** Delete all live documents in this segment and return deleted count. */
// TODO: Cant we just set this segment fully deleted, without keep the correct deleted count?
long deleteAll() throws IOException {
assert info.info.maxDoc() > 0;
FixedBitSet mutableBits = getMutableBits();
assert mutableBits != null;

int liveCount = mutableBits.cardinality();
mutableBits.clear();
pendingDeleteCount += liveCount;
return liveCount;
}

/** Returns a snapshot of the current live docs. */
Bits getLiveDocs() {
// Prevent modifications to the returned live docs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,13 @@ public synchronized boolean delete(int docID) throws IOException {
return pendingDeletes.delete(docID);
}

public synchronized long deleteAll() throws IOException {
if (reader == null && pendingDeletes.mustInitOnDelete()) {
getReader(IOContext.DEFAULT).decRef(); // pass a reader to initialize the pending deletes
}
return pendingDeletes.deleteAll();
}

// NOTE: removes callers ref
public synchronized void dropReaders() throws IOException {
// TODO: can we somehow use IOUtils here...? problem is
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,8 +347,11 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
return new ScorerSupplier() {
@Override
public Scorer get(long leadCost) {
return new ConstantScoreScorer(
weight, score(), scoreMode, DocIdSetIterator.all(reader.maxDoc()));
ConstantScoreScorer constantScoreScorer =
new ConstantScoreScorer(
weight, score(), scoreMode, DocIdSetIterator.all(reader.maxDoc()));
constantScoreScorer.setMatchAll(true);
return constantScoreScorer;
}

@Override
Expand Down
12 changes: 12 additions & 0 deletions lucene/core/src/java/org/apache/lucene/search/Scorer.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ public abstract class Scorer extends Scorable {
/** the Scorer's parent Weight */
protected final Weight weight;

private boolean isMatchAll;

/**
* Constructs a Scorer
*
Expand Down Expand Up @@ -98,4 +100,14 @@ public int advanceShallow(int target) throws IOException {
* {@link #advanceShallow(int) shallow-advanced} to included and {@code upTo} included.
*/
public abstract float getMaxScore(int upTo) throws IOException;

/** Set whether we can match all docs in this scored segment. */
public void setMatchAll(boolean isMatchAll) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm adding this API to such a widely used class (Scorer) makes me nervous -- it means consumers of Scorer can suddenly setMatchAll without being true?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it means consumers of Scorer can suddenly setMatchAll without being true?

Yes, I should found a better way to mark isMatchAll.

this.isMatchAll = isMatchAll;
}

/** Return true if we matched all docs in this scored segment. */
public boolean isMatchAll() {
return isMatchAll;
}
}
66 changes: 45 additions & 21 deletions lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,28 +58,9 @@
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.*;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory;
Expand Down Expand Up @@ -202,6 +183,49 @@ public boolean keepFullyDeletedSegment(
dir.close();
}

public void testDeleteByQuery() throws IOException {
Directory dir = newDirectory();
IndexWriter writer =
new IndexWriter(
dir,
new IndexWriterConfig(new MockAnalyzer(random()))
.setMergePolicy(NoMergePolicy.INSTANCE));

Document doc;
for (int i = 0; i < 10; i++) {
doc = new Document();
doc.add(new LongField("content", i, Field.Store.NO));
writer.addDocument(doc);
}
writer.flush();

for (int i = 10; i < 20; i++) {
doc = new Document();
doc.add(new LongField("content", i, Field.Store.NO));
writer.addDocument(doc);
}
writer.flush();

for (int i = 20; i < 30; i++) {
doc = new Document();
doc.add(new LongField("content", i, Field.Store.NO));
writer.addDocument(doc);
}
writer.flush();
writer.deleteDocuments(LongPoint.newRangeQuery("content", 10, 19));

DirectoryReader reader = DirectoryReader.open(writer);
IndexSearcher searcher = newSearcher(reader);
assertEquals(
0, searcher.search(LongPoint.newRangeQuery("content", 10, 19), 100).totalHits.value);
assertEquals(
20, searcher.search(LongPoint.newRangeQuery("content", 0, 30), 100).totalHits.value);

reader.close();
writer.close();
dir.close();
}

static void addDoc(IndexWriter writer) throws IOException {
Document doc = new Document();
doc.add(newTextField("content", "aaa", Field.Store.NO));
Expand Down