Skip to content

Commit

Permalink
Suppress lease background sync failures if stopping (#40902)
Browse files Browse the repository at this point in the history
If the transport service is stopped, likely because we are shutting
down, and a retention lease background sync fires the logs will display
a warn message and stacktrace. Yet, this situaton is harmless and can
happen as a normal course of business when shutting down. This commit
suppresses the log messages in this case.
  • Loading branch information
jasontedor committed Apr 6, 2019
1 parent aa30c2e commit 781c31e
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 10 deletions.
9 changes: 9 additions & 0 deletions server/src/main/java/org/elasticsearch/ExceptionsHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
import org.elasticsearch.index.Index;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.transport.TransportException;

import java.io.IOException;
import java.io.PrintWriter;
Expand Down Expand Up @@ -193,6 +194,14 @@ public static Throwable unwrap(Throwable t, Class<?>... clazzes) {
return null;
}

public static boolean isTransportStoppedForAction(final Throwable t, final String action) {
final TransportException maybeTransport =
(TransportException) ExceptionsHelper.unwrap(t, TransportException.class);
return maybeTransport != null
&& (maybeTransport.getMessage().equals("TransportService is closed stopped can't send request")
|| maybeTransport.getMessage().equals("transport stopped, action: " + action));
}

/**
* Throws the specified exception. If null if specified then <code>true</code> is returned.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.node.NodeClosedException;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.transport.TransportException;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -200,10 +199,9 @@ public void onFailure(Exception replicaException) {

private void onNoLongerPrimary(Exception failure) {
final Throwable cause = ExceptionsHelper.unwrapCause(failure);
final boolean nodeIsClosing = cause instanceof NodeClosedException
|| (cause instanceof TransportException &&
("TransportService is closed stopped can't send request".equals(cause.getMessage())
|| "transport stopped, action: internal:cluster/shard/failure".equals(cause.getMessage())));
final boolean nodeIsClosing =
cause instanceof NodeClosedException
|| ExceptionsHelper.isTransportStoppedForAction(cause, "internal:cluster/shard/failure");
final String message;
if (nodeIsClosing) {
message = String.format(Locale.ROOT,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,15 @@ public void backgroundSync(
ActionListener.wrap(
r -> {},
e -> {
if (ExceptionsHelper.unwrap(e, AlreadyClosedException.class, IndexShardClosedException.class) == null) {
getLogger().warn(new ParameterizedMessage("{} retention lease background sync failed", shardId), e);
if (ExceptionsHelper.isTransportStoppedForAction(e, ACTION_NAME + "[p]")) {
// we are likely shutting down
return;
}
if (ExceptionsHelper.unwrap(e, AlreadyClosedException.class, IndexShardClosedException.class) != null) {
// the shard is closed
return;
}
getLogger().warn(new ParameterizedMessage("{} retention lease background sync failed", shardId), e);
}));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ public void onFailure(Exception e) {
}
@Override
public void doRun() {
// cf. ExceptionsHelper#isTransportStoppedForAction
TransportException ex = new TransportException("transport stopped, action: " + holderToNotify.action());
holderToNotify.handler().handleException(ex);
}
Expand Down Expand Up @@ -615,8 +616,13 @@ private <T extends TransportResponse> void sendRequestInternal(final Transport.C
}
try {
if (lifecycle.stoppedOrClosed()) {
// if we are not started the exception handling will remove the RequestHolder again and calls the handler to notify
// the caller. It will only notify if the toStop code hasn't done the work yet.
/*
* If we are not started the exception handling will remove the request holder again and calls the handler to notify the
* caller. It will only notify if toStop hasn't done the work yet.
*
* Do not edit this exception message, it is currently relied upon in production code!
*/
// TODO: make a dedicated exception for a stopped transport service? cf. ExceptionsHelper#isTransportStoppedForAction
throw new TransportException("TransportService is closed stopped can't send request");
}
if (timeoutHandler != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import org.elasticsearch.test.transport.CapturingTransport;
import org.elasticsearch.threadpool.TestThreadPool;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportException;
import org.elasticsearch.transport.TransportService;
import org.mockito.ArgumentCaptor;

Expand Down Expand Up @@ -204,9 +205,13 @@ protected void doExecute(Task task, Request request, ActionListener<ReplicationR
final Exception e = randomFrom(
new AlreadyClosedException("closed"),
new IndexShardClosedException(indexShard.shardId()),
new TransportException(randomFrom(
"failed",
"TransportService is closed stopped can't send request",
"transport stopped, action: indices:admin/seq_no/retention_lease_background_sync[p]")),
new RuntimeException("failed"));
listener.onFailure(e);
if (e instanceof AlreadyClosedException == false && e instanceof IndexShardClosedException == false) {
if (e.getMessage().equals("failed")) {
final ArgumentCaptor<ParameterizedMessage> captor = ArgumentCaptor.forClass(ParameterizedMessage.class);
verify(retentionLeaseSyncActionLogger).warn(captor.capture(), same(e));
final ParameterizedMessage message = captor.getValue();
Expand Down

0 comments on commit 781c31e

Please sign in to comment.