diff --git a/docs/changelog/44536.yaml b/docs/changelog/44536.yaml new file mode 100644 index 0000000000000..ddae6474d3d91 --- /dev/null +++ b/docs/changelog/44536.yaml @@ -0,0 +1,5 @@ +pr: 44536 +summary: Log summarised description of `StartupExceptions` +area: Infra/Logging +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandler.java b/server/src/main/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandler.java index 1c3c0ccf6b61f..c60a0d6015255 100644 --- a/server/src/main/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandler.java +++ b/server/src/main/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandler.java @@ -22,6 +22,8 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.common.SuppressForbidden; import java.io.IOError; @@ -53,6 +55,10 @@ public void uncaughtException(Thread t, Throwable e) { halt(1); } } + } else if (e instanceof StartupException) { + // StartupException means that this server didn't start, and we want to do everything we can to make that + // error clear to anyone who consults the logs so that they're not simply overwhelmed by a stack trace. + onStartupException(t.getName(), (StartupException) e); } else { onNonFatalUncaught(t.getName(), e); } @@ -70,6 +76,41 @@ void onNonFatalUncaught(final String threadName, final Throwable t) { logger.warn(() -> new ParameterizedMessage("uncaught exception in thread [{}]", threadName), t); } + void onStartupException(final String threadName, final StartupException e) { + String bannerMessage = describeStartupException(e); + logger.error(bannerMessage); + logger.warn(() -> new ParameterizedMessage("uncaught exception in thread [{}]", threadName), e); + // Log the error message twice (before and after the stack trace) so that it is super-obvious to anyone reading the logs + logger.error(bannerMessage); + } + + // accessible for testing + static String describeStartupException(StartupException e) { + StringBuilder bannerMessage = new StringBuilder("an exception was thrown that prevented this node from starting (") + // Append the top message so that it as clear as possible that this message is just a summary of the stacktrace next to it. + .append(e.getMessage()) + .append(")"); + // Find the first elasticsearch exception, that message is the most likely to provide a helpful explanation + ElasticsearchException esCause = (ElasticsearchException) ExceptionsHelper.unwrap(e, ElasticsearchException.class); + if (esCause != null) { + bannerMessage.append("\nthis was caused by:"); + // Allow the elasticsearch exception to decide on the best root cause(s to report) + for (ElasticsearchException root : esCause.guessRootCauses()) { + bannerMessage.append("\n * ").append(root.getMessage()) + .append(" (").append(ElasticsearchException.getExceptionName(root)).append(")"); + String indent = " "; + Throwable cause = root.getCause(); + for (int counter = 0; counter < 3 && cause != null; counter++) { + bannerMessage.append('\n').append(indent).append("- caused by: ") + .append(cause.getMessage()).append(" (").append(ElasticsearchException.getExceptionName(cause)).append(")"); + cause = cause.getCause(); + indent += " "; + } + } + } + return bannerMessage.toString(); + } + void halt(int status) { AccessController.doPrivileged(new PrivilegedHaltAction(status)); } diff --git a/server/src/test/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandlerTests.java b/server/src/test/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandlerTests.java index df415d713d103..f0c7b4ed4ed2e 100644 --- a/server/src/test/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandlerTests.java +++ b/server/src/test/java/org/elasticsearch/bootstrap/ElasticsearchUncaughtExceptionHandlerTests.java @@ -19,6 +19,8 @@ package org.elasticsearch.bootstrap; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ElasticsearchSecurityException; import org.elasticsearch.test.ESTestCase; import java.io.IOError; @@ -129,6 +131,38 @@ public void testIsFatalCause() { assertNonFatal(new UncheckedIOException(new IOException())); } + public void testStartupExceptionMessageWithoutElasticsearchException() { + final StartupException exception = new StartupException(new IndexOutOfBoundsException("test")); + final String message = ElasticsearchUncaughtExceptionHandler.describeStartupException(exception); + assertThat(message, equalTo("an exception was thrown that prevented this node from starting" + + " (java.lang.IndexOutOfBoundsException: test)")); + } + + public void testStartupExceptionMessageWithElasticsearchException() { + final StartupException exception = new StartupException(new RuntimeException("test", + new ElasticsearchException("es-exception", new RuntimeException("the-cause", new IndexOutOfBoundsException("root-cause"))))); + final String message = ElasticsearchUncaughtExceptionHandler.describeStartupException(exception); + assertThat(message, equalTo( + "an exception was thrown that prevented this node from starting (java.lang.RuntimeException: test)\n" + + "this was caused by:\n" + + " * es-exception (exception)\n" + + " - caused by: the-cause (runtime_exception)\n" + + " - caused by: root-cause (index_out_of_bounds_exception)")); + } + + public void testStartupExceptionMessageWithChainOfElasticsearchExceptions() { + final StartupException exception = new StartupException(new RuntimeException("test", new ElasticsearchException("es-exception-1", + new ElasticsearchSecurityException("es-exception-2", new RuntimeException("the-cause", + new IndexOutOfBoundsException("root-cause")))))); + final String message = ElasticsearchUncaughtExceptionHandler.describeStartupException(exception); + assertThat(message, equalTo( + "an exception was thrown that prevented this node from starting (java.lang.RuntimeException: test)\n" + + "this was caused by:\n" + + " * es-exception-2 (security_exception)\n" + + " - caused by: the-cause (runtime_exception)\n" + + " - caused by: root-cause (index_out_of_bounds_exception)")); + } + private void assertFatal(Throwable cause) { assertTrue(ElasticsearchUncaughtExceptionHandler.isFatalUncaught(cause)); }