Skip to content

Commit

Permalink
Use forbidden APIs to help avoid bad no-merge IWCs (#72107)
Browse files Browse the repository at this point in the history
In #72084 we fixed a couple of places where we create an
`IndexWriterConfig` with a no-merge policy but the default merge
scheduler. This change fixes the other places where we do this, and also
forbids inadvertent usages of `NoMergePolicy#INSTANCE` via
forbidden-apis.
  • Loading branch information
DaveCTurner committed Apr 22, 2021
1 parent 0173ffb commit 784a2e2
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ java.nio.channels.FileChannel#read(java.nio.ByteBuffer, long)
@defaultMessage Use Lucene.parseLenient instead it strips off minor version
org.apache.lucene.util.Version#parseLeniently(java.lang.String)

org.apache.lucene.index.NoMergePolicy#INSTANCE @ explicit use of NoMergePolicy risks forgetting to configure NoMergeScheduler; use org.elasticsearch.common.lucene.Lucene#indexWriterConfigWithNoMerging() instead.

@defaultMessage Spawns a new thread which is solely under lucenes control use ThreadPool#relativeTimeInMillis instead
org.apache.lucene.search.TimeLimitingCollector#getGlobalTimerThread()
org.apache.lucene.search.TimeLimitingCollector#getGlobalCounter()
Expand Down
20 changes: 16 additions & 4 deletions server/src/main/java/org/elasticsearch/common/lucene/Lucene.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
Expand All @@ -18,6 +19,7 @@
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
Expand All @@ -36,6 +38,7 @@
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.NoMergeScheduler;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SegmentCommitInfo;
Expand Down Expand Up @@ -71,6 +74,7 @@
import org.apache.lucene.store.Lock;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.Nullable;
Expand Down Expand Up @@ -206,11 +210,10 @@ public static SegmentInfos pruneUnreferencedFiles(String segmentsFileName, Direc
}
}
final IndexCommit cp = getIndexCommit(si, directory);
try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)
try (IndexWriter writer = new IndexWriter(directory, indexWriterConfigWithNoMerging(Lucene.STANDARD_ANALYZER)
.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD)
.setIndexCommit(cp)
.setCommitOnClose(false)
.setMergePolicy(NoMergePolicy.INSTANCE)
.setOpenMode(IndexWriterConfig.OpenMode.APPEND))) {
// do nothing and close this will kick off IndexFileDeleter which will remove all pending files
}
Expand All @@ -237,9 +240,8 @@ public static void cleanLuceneIndex(Directory directory) throws IOException {
}
}
}
try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)
try (IndexWriter writer = new IndexWriter(directory, indexWriterConfigWithNoMerging(Lucene.STANDARD_ANALYZER)
.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD)
.setMergePolicy(NoMergePolicy.INSTANCE) // no merges
.setCommitOnClose(false) // no commits
.setOpenMode(IndexWriterConfig.OpenMode.CREATE) // force creation - don't append...
))
Expand Down Expand Up @@ -1083,4 +1085,14 @@ public CacheHelper getReaderCacheHelper() {
}
};
}

/**
* Prepares a new {@link IndexWriterConfig} that does not do any merges, by setting both the merge policy and the merge scheduler.
* Setting just the merge policy means that constructing the index writer will create a {@link ConcurrentMergeScheduler} by default,
* which is quite heavyweight and in particular it can unnecessarily block on {@link IOUtils#spins}.
*/
@SuppressForbidden(reason = "NoMergePolicy#INSTANCE is safe to use since we also set NoMergeScheduler#INSTANCE")
public static IndexWriterConfig indexWriterConfigWithNoMerging(Analyzer analyzer) {
return new IndexWriterConfig(analyzer).setMergePolicy(NoMergePolicy.INSTANCE).setMergeScheduler(NoMergeScheduler.INSTANCE);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.TieredMergePolicy;
import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Setting.Property;
import org.elasticsearch.common.unit.ByteSizeUnit;
Expand Down Expand Up @@ -226,6 +227,7 @@ private int adjustMaxMergeAtOnceIfNeeded(int maxMergeAtOnce, double segmentsPerT
return maxMergeAtOnce;
}

@SuppressForbidden(reason="we always use an appropriate merge scheduler alongside this policy so NoMergePolic#INSTANCE is ok")
MergePolicy getMergePolicy() {
return mergesEnabled ? mergePolicy : NoMergePolicy.INSTANCE;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.Lock;
Expand Down Expand Up @@ -60,6 +59,8 @@
import java.util.Objects;
import java.util.stream.StreamSupport;

import static org.elasticsearch.common.lucene.Lucene.indexWriterConfigWithNoMerging;

public class RemoveCorruptedShardDataCommand extends ElasticsearchNodeCommand {

private static final Logger logger = LogManager.getLogger(RemoveCorruptedShardDataCommand.class);
Expand Down Expand Up @@ -384,13 +385,9 @@ protected void addNewHistoryCommit(Directory indexDirectory, Terminal terminal,

terminal.println("Marking index with the new history uuid : " + historyUUID);
// commit the new history id
final IndexWriterConfig iwc = new IndexWriterConfig(null)
// we don't want merges to happen here - we call maybe merge on the engine
// later once we stared it up otherwise we would need to wait for it here
// we also don't specify a codec here and merges should use the engines for this index
final IndexWriterConfig iwc = indexWriterConfigWithNoMerging(null)
.setCommitOnClose(false)
.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD)
.setMergePolicy(NoMergePolicy.INSTANCE)
.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
// IndexWriter acquires directory lock by its own
try (IndexWriter indexWriter = new IndexWriter(indexDirectory, iwc)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.search.Sort;
import org.apache.lucene.store.Directory;
Expand Down Expand Up @@ -52,6 +51,7 @@
import java.util.function.BiConsumer;
import java.util.stream.Collectors;

import static org.elasticsearch.common.lucene.Lucene.indexWriterConfigWithNoMerging;
import static org.elasticsearch.common.unit.TimeValue.timeValueMillis;

/**
Expand Down Expand Up @@ -149,13 +149,9 @@ void addIndices(final RecoveryState.Index indexRecoveryStats, final Directory ta

final Directory hardLinkOrCopyTarget = new org.apache.lucene.store.HardlinkCopyDirectoryWrapper(target);

IndexWriterConfig iwc = new IndexWriterConfig(null)
IndexWriterConfig iwc = indexWriterConfigWithNoMerging(null)
.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD)
.setCommitOnClose(false)
// we don't want merges to happen here - we call maybe merge on the engine
// later once we stared it up otherwise we would need to wait for it here
// we also don't specify a codec here and merges should use the engines for this index
.setMergePolicy(NoMergePolicy.INSTANCE)
.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
.setIndexCreatedVersionMajor(luceneIndexCreatedVersionMajor);
if (indexSort != null) {
Expand Down
15 changes: 5 additions & 10 deletions server/src/main/java/org/elasticsearch/index/store/Store.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
import org.apache.lucene.index.IndexNotFoundException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.NoMergeScheduler;
import org.apache.lucene.index.SegmentCommitInfo;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.AlreadyClosedException;
Expand Down Expand Up @@ -97,6 +95,7 @@

import static java.util.Collections.emptyMap;
import static java.util.Collections.unmodifiableMap;
import static org.elasticsearch.common.lucene.Lucene.indexWriterConfigWithNoMerging;

/**
* A Store provides plain access to files written by an elasticsearch index shard. Each shard
Expand Down Expand Up @@ -1615,14 +1614,10 @@ private static IndexWriter newTemporaryEmptyIndexWriter(final Directory dir, fin
}

private static IndexWriterConfig newTemporaryIndexWriterConfig() {
return new IndexWriterConfig(null)
// this config is only used for temporary IndexWriter instances, used to initialize the index or update the commit data,
// so we don't want any merges to happen
return indexWriterConfigWithNoMerging(null)
.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD)
.setCommitOnClose(false)
// this config is only used for temporary IndexWriter instances, used to initialize the index or update the commit data,
// so we don't want any merges to happen
.setMergePolicy(NoMergePolicy.INSTANCE)
// also override the default of ConcurrentMergeScheduler which calls IOUtils#spins on all mount points, which is problematic
// if the system has an otherwise-irrelevant mount point that's not responding
.setMergeScheduler(NoMergeScheduler.INSTANCE);
.setCommitOnClose(false);
}
}

0 comments on commit 784a2e2

Please sign in to comment.