- 
                Notifications
    You must be signed in to change notification settings 
- Fork 25.6k
Decouple recoveries from engine flush #10624
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -73,5 +73,11 @@ public int refCount() { | |
| return this.refCount.get(); | ||
| } | ||
|  | ||
|  | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. javadocs? | ||
| /** gets the name of this instance */ | ||
| public String getName() { | ||
| return name; | ||
| } | ||
|  | ||
| protected abstract void closeInternal(); | ||
| } | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -30,17 +30,48 @@ | |
| public class ReleasableLock implements Releasable { | ||
| private final Lock lock; | ||
|  | ||
| /* a per thread boolean indicating the lock is held by it. only works when assertions are enabled */ | ||
| private final ThreadLocal<Boolean> holdingThreads; | ||
|  | ||
| public ReleasableLock(Lock lock) { | ||
| this.lock = lock; | ||
| boolean useHoldingThreads = false; | ||
| assert (useHoldingThreads = true); | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe you can leave a comment here about what the holdingThreads ThreadLocal is used for, that way it doesn't get accidentally changed during a cleanup at a later time. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added java docs on the holdingThreads field | ||
| if (useHoldingThreads) { | ||
| holdingThreads = new ThreadLocal<>(); | ||
| } else { | ||
| holdingThreads = null; | ||
| } | ||
| } | ||
|  | ||
| @Override | ||
| public void close() { | ||
| lock.unlock(); | ||
| assert removeCurrentThread(); | ||
| } | ||
|  | ||
|  | ||
| public ReleasableLock acquire() throws EngineException { | ||
| lock.lock(); | ||
| assert addCurrentThread(); | ||
| return this; | ||
| } | ||
|  | ||
| private boolean addCurrentThread() { | ||
| holdingThreads.set(true); | ||
| return true; | ||
| } | ||
|  | ||
| private boolean removeCurrentThread() { | ||
| holdingThreads.remove(); | ||
| return true; | ||
| } | ||
|  | ||
| public Boolean isHeldByCurrentThread() { | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we have to make this a public API? and if so can we return  There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the whole point here is to be able to ask a lock if it's held by the current thread so it has to be public. I made it a Boolean because it may not know (if assertions are not enabled).  can change to  | ||
| if (holdingThreads == null) { | ||
| throw new UnsupportedOperationException("asserts must be enabled"); | ||
| } | ||
| Boolean b = holdingThreads.get(); | ||
| return b != null && b.booleanValue(); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -65,8 +65,6 @@ | |
| import org.elasticsearch.index.store.StoreModule; | ||
| import org.elasticsearch.index.suggest.SuggestShardModule; | ||
| import org.elasticsearch.index.termvectors.ShardTermVectorsModule; | ||
| import org.elasticsearch.index.translog.Translog; | ||
| import org.elasticsearch.index.translog.TranslogModule; | ||
| import org.elasticsearch.index.translog.TranslogService; | ||
| import org.elasticsearch.indices.IndicesLifecycle; | ||
| import org.elasticsearch.indices.IndicesService; | ||
|  | @@ -187,6 +185,7 @@ public IndexShard shard(int shardId) { | |
| } | ||
| return null; | ||
| } | ||
|  | ||
| /** | ||
| * Return the shard with the provided id, or throw an exception if it doesn't exist. | ||
| */ | ||
|  | @@ -320,7 +319,6 @@ public synchronized IndexShard createShard(int sShardId, boolean primary) { | |
| modules.add(new ShardQueryCacheModule()); | ||
| modules.add(new ShardBitsetFilterCacheModule()); | ||
| modules.add(new ShardFieldDataModule()); | ||
| modules.add(new TranslogModule(indexSettings)); | ||
| modules.add(new IndexShardGatewayModule()); | ||
| modules.add(new PercolatorShardModule()); | ||
| modules.add(new ShardTermVectorsModule()); | ||
|  | @@ -386,7 +384,8 @@ private void closeShardInjector(String reason, ShardId sId, Injector shardInject | |
| } | ||
| } | ||
| // now we can close the translog service, we need to close it before the we close the shard | ||
| closeInjectorResource(sId, shardInjector, TranslogService.class); | ||
| // note the that the translog service is not there for shadow replicas | ||
| closeInjectorOptionalResource(sId, shardInjector, TranslogService.class); | ||
| // this logic is tricky, we want to close the engine so we rollback the changes done to it | ||
| // and close the shard so no operations are allowed to it | ||
| if (indexShard != null) { | ||
|  | @@ -402,7 +401,6 @@ private void closeShardInjector(String reason, ShardId sId, Injector shardInject | |
| MergeSchedulerProvider.class, | ||
| MergePolicyProvider.class, | ||
| IndexShardGatewayService.class, | ||
| Translog.class, | ||
| PercolatorQueriesRegistry.class); | ||
|  | ||
| // call this before we close the store, so we can release resources for it | ||
|  | @@ -423,18 +421,30 @@ private void closeShardInjector(String reason, ShardId sId, Injector shardInject | |
| */ | ||
| private void closeInjectorResource(ShardId shardId, Injector shardInjector, Class<? extends Closeable>... toClose) { | ||
| for (Class<? extends Closeable> closeable : toClose) { | ||
| try { | ||
| final Closeable instance = shardInjector.getInstance(closeable); | ||
| if (instance == null) { | ||
| throw new NullPointerException("No instance available for " + closeable.getName()); | ||
| } | ||
| IOUtils.close(instance); | ||
| } catch (Throwable t) { | ||
| logger.debug("{} failed to close {}", t, shardId, Strings.toUnderscoreCase(closeable.getSimpleName())); | ||
| if (closeInjectorOptionalResource(shardId, shardInjector, closeable) == false) { | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm really? NPE was good no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well, it was caught anyway and logged under debug? I made it a warn now.. | ||
| logger.warn("[{}] no instance available for [{}], ignoring... ", shardId, closeable.getSimpleName()); | ||
| } | ||
| } | ||
| } | ||
|  | ||
| /** | ||
| * Closes an optional resource. Returns true if the resource was found; | ||
| * NOTE: this method swallows all exceptions thrown from the close method of the injector and logs them as debug log | ||
| */ | ||
| private boolean closeInjectorOptionalResource(ShardId shardId, Injector shardInjector, Class<? extends Closeable> toClose) { | ||
| try { | ||
| final Closeable instance = shardInjector.getInstance(toClose); | ||
| if (instance == null) { | ||
| return false; | ||
| } | ||
| IOUtils.close(instance); | ||
| } catch (Throwable t) { | ||
| logger.debug("{} failed to close {}", t, shardId, Strings.toUnderscoreCase(toClose.getSimpleName())); | ||
| } | ||
| return true; | ||
| } | ||
|  | ||
|  | ||
| private void onShardClose(ShardLock lock, boolean ownsShard) { | ||
| if (deleted.get()) { // we remove that shards content if this index has been deleted | ||
| try { | ||
|  | @@ -464,7 +474,7 @@ public StoreCloseListener(ShardId shardId, boolean ownsShard) { | |
|  | ||
| @Override | ||
| public void handle(ShardLock lock) { | ||
| assert lock.getShardId().equals(shardId) : "shard Id mismatch, expected: " + shardId + " but got: " + lock.getShardId(); | ||
| assert lock.getShardId().equals(shardId) : "shard id mismatch, expected: " + shardId + " but got: " + lock.getShardId(); | ||
| onShardClose(lock, ownsShard); | ||
| } | ||
| } | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -82,7 +82,6 @@ public abstract class Engine implements Closeable { | |
| protected Engine(EngineConfig engineConfig) { | ||
| Preconditions.checkNotNull(engineConfig.getStore(), "Store must be provided to the engine"); | ||
| Preconditions.checkNotNull(engineConfig.getDeletionPolicy(), "Snapshot deletion policy must be provided to the engine"); | ||
| Preconditions.checkNotNull(engineConfig.getTranslog(), "Translog must be provided to the engine"); | ||
|  | ||
| this.engineConfig = engineConfig; | ||
| this.shardId = engineConfig.getShardId(); | ||
|  | @@ -278,6 +277,9 @@ public final Searcher acquireSearcher(String source) throws EngineException { | |
| } | ||
| } | ||
|  | ||
| /** returns the translog for this engine */ | ||
| public abstract Translog translog(); | ||
|  | ||
| protected void ensureOpen() { | ||
| if (isClosed.get()) { | ||
| throw new EngineClosedException(shardId, failedEngine); | ||
|  | @@ -449,12 +451,12 @@ public void forceMerge(boolean flush) { | |
| public abstract void forceMerge(boolean flush, int maxNumSegments, boolean onlyExpungeDeletes, boolean upgrade, boolean upgradeOnlyAncientSegments) throws EngineException; | ||
|  | ||
| /** | ||
| * Snapshots the index and returns a handle to it. Will always try and "commit" the | ||
| * Snapshots the index and returns a handle to it. If needed will try and "commit" the | ||
| * lucene index to make sure we have a "fresh" copy of the files to snapshot. | ||
| * | ||
| * @param flushFirst indicates whether the engine should flush before returning the snapshot | ||
| */ | ||
| public abstract SnapshotIndexCommit snapshotIndex() throws EngineException; | ||
|  | ||
| public abstract void recover(RecoveryHandler recoveryHandler) throws EngineException; | ||
| public abstract SnapshotIndexCommit snapshotIndex(boolean flushFirst) throws EngineException; | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we separate the flush and make this a  There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm I see it's to opt out... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I can unbundle this and change those who use true to make a separate flush call, but we'll need a wait to not commit/open a new translog. I think we can probably always commit the translog now (one parameter less) but didn't want to go on that adventure now. Rather do it in another iteration. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok | ||
|  | ||
| /** fail engine due to some error. the engine will also be closed. */ | ||
| public void failEngine(String reason, Throwable failure) { | ||
|  | @@ -1048,7 +1050,7 @@ public void release() { | |
| public void flushAndClose() throws IOException { | ||
| if (isClosed.get() == false) { | ||
| logger.trace("flushAndClose now acquire writeLock"); | ||
| try (ReleasableLock _ = writeLock.acquire()) { | ||
| try (ReleasableLock lock = writeLock.acquire()) { | ||
| logger.trace("flushAndClose now acquired writeLock"); | ||
| try { | ||
| logger.debug("flushing shard on close - this might take some time to sync files to disk"); | ||
|  | @@ -1070,7 +1072,7 @@ public void flushAndClose() throws IOException { | |
| public void close() throws IOException { | ||
| if (isClosed.get() == false) { // don't acquire the write lock if we are already closed | ||
| logger.debug("close now acquiring writeLock"); | ||
| try (ReleasableLock _ = writeLock.acquire()) { | ||
| try (ReleasableLock lock = writeLock.acquire()) { | ||
| logger.debug("close acquired writeLock"); | ||
| closeNoLock("api"); | ||
| } | ||
|  | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can we have a shorter name here maybe
forceReadFromFileChannelThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we already have
readFromFileChannelWithEofException. agreed it's long but rather do it in another change..There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not sure if we should make such a big deal out of it