Skip to content

Commit

Permalink
Revert "Add a merge policy that prunes ID postings for soft-deleted b…
Browse files Browse the repository at this point in the history
…ut retained documents (elastic#40741)"

This reverts commit 186b52c
github messed up the commit message due to a retry.
A followup commit will add this change again with a corrected
commit message.
  • Loading branch information
s1monw authored and alpar-t committed Jun 10, 2019
1 parent 569e08a commit c095612
Show file tree
Hide file tree
Showing 5 changed files with 4 additions and 507 deletions.
Expand Up @@ -74,9 +74,7 @@ final class PerThreadIDVersionAndSeqNoLookup {
// If a segment contains only no-ops, it does not have _uid but has both _soft_deletes and _tombstone fields.
final NumericDocValues softDeletesDV = reader.getNumericDocValues(Lucene.SOFT_DELETES_FIELD);
final NumericDocValues tombstoneDV = reader.getNumericDocValues(SeqNoFieldMapper.TOMBSTONE_NAME);
// this is a special case when we pruned away all IDs in a segment since all docs are deleted.
final boolean allDocsDeleted = (softDeletesDV != null && reader.numDocs() == 0);
if ((softDeletesDV == null || tombstoneDV == null) && allDocsDeleted == false) {
if (softDeletesDV == null || tombstoneDV == null) {
throw new IllegalArgumentException("reader does not have _uid terms but not a no-op segment; " +
"_soft_deletes [" + softDeletesDV + "], _tombstone [" + tombstoneDV + "]");
}
Expand Down
Expand Up @@ -2176,8 +2176,7 @@ private IndexWriterConfig getIndexWriterConfig() {
iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD);
if (softDeleteEnabled) {
mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery,
new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, softDeletesPolicy::getRetentionQuery,
new PrunePostingsMergePolicy(mergePolicy, IdFieldMapper.NAME)));
new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, softDeletesPolicy::getRetentionQuery, mergePolicy));
}
iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
iwc.setSimilarity(engineConfig.getSimilarity());
Expand Down

This file was deleted.

Expand Up @@ -51,8 +51,6 @@
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
Expand Down Expand Up @@ -121,8 +119,6 @@
import org.elasticsearch.index.mapper.RootObjectMapper;
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
import org.elasticsearch.index.mapper.SourceFieldMapper;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.mapper.VersionFieldMapper;
import org.elasticsearch.index.seqno.LocalCheckpointTracker;
import org.elasticsearch.index.seqno.ReplicationTracker;
import org.elasticsearch.index.seqno.RetentionLease;
Expand Down Expand Up @@ -1462,61 +1458,6 @@ public void testForceMergeWithoutSoftDeletes() throws IOException {
}
}

/*
* we are testing an edge case here where we have a fully deleted segment that is retained but has all it's IDs pruned away.
*/
public void testLookupVersionWithPrunedAwayIds() throws IOException {
try (Directory dir = newDirectory()) {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Lucene.STANDARD_ANALYZER);
indexWriterConfig.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD);
try (IndexWriter writer = new IndexWriter(dir,
indexWriterConfig.setMergePolicy(new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD,
MatchAllDocsQuery::new, new PrunePostingsMergePolicy(indexWriterConfig.getMergePolicy(), "_id"))))) {
org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
doc.add(new Field(IdFieldMapper.NAME, "1", IdFieldMapper.Defaults.FIELD_TYPE));
doc.add(new NumericDocValuesField(VersionFieldMapper.NAME, -1));
doc.add(new NumericDocValuesField(SeqNoFieldMapper.NAME, 1));
doc.add(new NumericDocValuesField(SeqNoFieldMapper.PRIMARY_TERM_NAME, 1));
writer.addDocument(doc);
writer.flush();
writer.softUpdateDocument(new Term(IdFieldMapper.NAME, "1"), doc, new NumericDocValuesField(Lucene.SOFT_DELETES_FIELD, 1));
writer.updateNumericDocValue(new Term(IdFieldMapper.NAME, "1"), Lucene.SOFT_DELETES_FIELD, 1);
writer.forceMerge(1);
try (DirectoryReader reader = DirectoryReader.open(writer)) {
assertEquals(1, reader.leaves().size());
assertNull(VersionsAndSeqNoResolver.loadDocIdAndVersion(reader, new Term(IdFieldMapper.NAME, "1"), false));
}
}
}
}

public void testUpdateWithFullyDeletedSegments() throws IOException {
Settings.Builder settings = Settings.builder()
.put(defaultSettings.getSettings())
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true)
.put(IndexSettings.INDEX_SOFT_DELETES_RETENTION_OPERATIONS_SETTING.getKey(), Integer.MAX_VALUE);
final IndexMetaData indexMetaData = IndexMetaData.builder(defaultSettings.getIndexMetaData()).settings(settings).build();
final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(indexMetaData);
final AtomicLong globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED);
final Set<String> liveDocs = new HashSet<>();
try (Store store = createStore();
InternalEngine engine = createEngine(config(indexSettings, store, createTempDir(), newMergePolicy(), null,
null, globalCheckpoint::get))) {
int numDocs = scaledRandomIntBetween(10, 100);
for (int i = 0; i < numDocs; i++) {
ParsedDocument doc = testParsedDocument(Integer.toString(i), null, testDocument(), B_1, null);
engine.index(indexForDoc(doc));
liveDocs.add(doc.id());
}

for (int i = 0; i < numDocs; i++) {
ParsedDocument doc = testParsedDocument(Integer.toString(i), null, testDocument(), B_1, null);
engine.index(indexForDoc(doc));
liveDocs.add(doc.id());
}
}
}

public void testForceMergeWithSoftDeletesRetention() throws Exception {
final long retainedExtraOps = randomLongBetween(0, 10);
Settings.Builder settings = Settings.builder()
Expand Down Expand Up @@ -5793,60 +5734,8 @@ public void testGetReaderAttributes() throws IOException {
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst"));
break;
default:
fail("unknownw type");
}
}
}

public void testPruneAwayDeletedButRetainedIds() throws Exception {
IOUtils.close(engine, store);
Settings settings = Settings.builder()
.put(defaultSettings.getSettings())
.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), true).build();
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings(
IndexMetaData.builder(defaultSettings.getIndexMetaData()).settings(settings).build());
store = createStore(indexSettings, newDirectory());
LogDocMergePolicy policy = new LogDocMergePolicy();
policy.setMinMergeDocs(10000);
try (InternalEngine engine = createEngine(indexSettings, store, createTempDir(), policy)) {
int numDocs = between(1, 20);
for (int i = 0; i < numDocs; i++) {
index(engine, i);
}
engine.forceMerge(true);
engine.delete(new Engine.Delete("_doc", "0", newUid("0"), primaryTerm.get()));
engine.refresh("test");
// now we have 2 segments since we now added a tombstone plus the old segment with the delete
try (Searcher searcher = engine.acquireSearcher("test")) {
IndexReader reader = searcher.reader();
assertEquals(2, reader.leaves().size());
LeafReaderContext leafReaderContext = reader.leaves().get(0);
LeafReader leafReader = leafReaderContext.reader();
assertEquals("the delete and the tombstone", 1, leafReader.numDeletedDocs());
assertEquals(numDocs, leafReader.maxDoc());
Terms id = leafReader.terms("_id");
assertNotNull(id);
assertEquals("deleted IDs are NOT YET pruned away", reader.numDocs() + 1, id.size());
TermsEnum iterator = id.iterator();
assertTrue(iterator.seekExact(Uid.encodeId("0")));
}

// lets force merge the tombstone and the original segment and make sure the doc is still there but the ID term is gone
engine.forceMerge(true);
engine.refresh("test");
try (Searcher searcher = engine.acquireSearcher("test")) {
IndexReader reader = searcher.reader();
assertEquals(1, reader.leaves().size());
LeafReaderContext leafReaderContext = reader.leaves().get(0);
LeafReader leafReader = leafReaderContext.reader();
assertEquals("the delete and the tombstone", 2, leafReader.numDeletedDocs());
assertEquals(numDocs + 1, leafReader.maxDoc());
Terms id = leafReader.terms("_id");
assertNotNull(id);
assertEquals("deleted IDs are pruned away", reader.numDocs(), id.size());
TermsEnum iterator = id.iterator();
assertFalse(iterator.seekExact(Uid.encodeId("0")));
default:
fail("unknownw type");
}
}
}
Expand Down

0 comments on commit c095612

Please sign in to comment.