From 79d9f5b77596a19ef47aaa149a2479c55ef888ec Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 11 Aug 2015 13:13:43 -0700 Subject: [PATCH] Logging: Log less source in slowlog Instead of logging the entire `_source` in the indexing slowlog we log by default just the first 1000 characters - this is controlled by the `index.indexing.slowlog.source` settings and can be set to `true` to log the whole `_source`, `false` to log none of it, and a number to log at most that many characters. Closes #4485 --- .../cluster/metadata/IndexMetaData.java | 1 + .../org/elasticsearch/common/Strings.java | 24 ++++++ .../index/indexing/IndexingSlowLog.java | 81 ++++++++++++++----- .../settings/IndexDynamicSettingsModule.java | 2 +- .../elasticsearch/common/StringsTests.java | 31 ++++++- .../index/indexing/IndexingSlowLogTests.java | 54 +++++++++++++ docs/reference/index-modules/slowlog.asciidoc | 7 ++ 7 files changed, 175 insertions(+), 25 deletions(-) create mode 100644 core/src/test/java/org/elasticsearch/index/indexing/IndexingSlowLogTests.java diff --git a/core/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java b/core/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java index 83fbf03941444..aa2fc96a9b5ba 100644 --- a/core/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java +++ b/core/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java @@ -175,6 +175,7 @@ public static State fromString(String state) { public static final String SETTING_SHARED_FS_ALLOW_RECOVERY_ON_ANY_NODE = "index.shared_filesystem.recover_on_any_node"; public static final String INDEX_UUID_NA_VALUE = "_na_"; + // hard-coded hash function as of 2.0 // older indices will read which hash function to use in their index settings private static final HashFunction MURMUR3_HASH_FUNCTION = new Murmur3HashFunction(); diff --git a/core/src/main/java/org/elasticsearch/common/Strings.java b/core/src/main/java/org/elasticsearch/common/Strings.java index 7aff86bc2cd61..58210e0fe5ca4 100644 --- a/core/src/main/java/org/elasticsearch/common/Strings.java +++ b/core/src/main/java/org/elasticsearch/common/Strings.java @@ -1093,4 +1093,28 @@ public static String toString(ToXContent toXContent) { throw new AssertionError("Cannot happen", e); } } + + /** + * Truncates string to a length less than length. Backtracks to throw out + * high surrogates. + */ + public static String cleanTruncate(String s, int length) { + if (s == null) { + return s; + } + /* + * Its pretty silly for you to truncate to 0 length but just in case + * someone does this shouldn't break. + */ + if (length == 0) { + return ""; + } + if (length >= s.length()) { + return s; + } + if (Character.isHighSurrogate(s.charAt(length - 1))) { + length--; + } + return s.substring(0, length); + } } diff --git a/core/src/main/java/org/elasticsearch/index/indexing/IndexingSlowLog.java b/core/src/main/java/org/elasticsearch/index/indexing/IndexingSlowLog.java index 2729e163d150e..ea45db2e91266 100644 --- a/core/src/main/java/org/elasticsearch/index/indexing/IndexingSlowLog.java +++ b/core/src/main/java/org/elasticsearch/index/indexing/IndexingSlowLog.java @@ -19,6 +19,8 @@ package org.elasticsearch.index.indexing; +import org.elasticsearch.common.Booleans; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.Settings; @@ -26,7 +28,6 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.mapper.ParsedDocument; -import org.elasticsearch.index.settings.IndexSettingsService; import java.io.IOException; import java.util.Locale; @@ -42,6 +43,12 @@ public final class IndexingSlowLog { private long indexInfoThreshold; private long indexDebugThreshold; private long indexTraceThreshold; + /** + * How much of the source to log in the slowlog - 0 means log none and + * anything greater than 0 means log at least that many characters + * of the source. + */ + private int maxSourceCharsToLog; private String level; @@ -55,18 +62,27 @@ public final class IndexingSlowLog { public static final String INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_TRACE = INDEX_INDEXING_SLOWLOG_PREFIX +".threshold.index.trace"; public static final String INDEX_INDEXING_SLOWLOG_REFORMAT = INDEX_INDEXING_SLOWLOG_PREFIX +".reformat"; public static final String INDEX_INDEXING_SLOWLOG_LEVEL = INDEX_INDEXING_SLOWLOG_PREFIX +".level"; + public static final String INDEX_INDEXING_SLOWLOG_MAX_SOURCE_CHARS_TO_LOG = INDEX_INDEXING_SLOWLOG_PREFIX + ".source"; IndexingSlowLog(Settings indexSettings) { + this(indexSettings, Loggers.getLogger(INDEX_INDEXING_SLOWLOG_PREFIX + ".index"), + Loggers.getLogger(INDEX_INDEXING_SLOWLOG_PREFIX + ".delete")); + } + + /** + * Build with the specified loggers. Only used to testing. + */ + IndexingSlowLog(Settings indexSettings, ESLogger indexLogger, ESLogger deleteLogger) { + this.indexLogger = indexLogger; + this.deleteLogger = deleteLogger; + this.reformat = indexSettings.getAsBoolean(INDEX_INDEXING_SLOWLOG_REFORMAT, true); this.indexWarnThreshold = indexSettings.getAsTime(INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_WARN, TimeValue.timeValueNanos(-1)).nanos(); this.indexInfoThreshold = indexSettings.getAsTime(INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_INFO, TimeValue.timeValueNanos(-1)).nanos(); this.indexDebugThreshold = indexSettings.getAsTime(INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_DEBUG, TimeValue.timeValueNanos(-1)).nanos(); this.indexTraceThreshold = indexSettings.getAsTime(INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_TRACE, TimeValue.timeValueNanos(-1)).nanos(); - this.level = indexSettings.get(INDEX_INDEXING_SLOWLOG_LEVEL, "TRACE").toUpperCase(Locale.ROOT); - - this.indexLogger = Loggers.getLogger(INDEX_INDEXING_SLOWLOG_PREFIX +".index"); - this.deleteLogger = Loggers.getLogger(INDEX_INDEXING_SLOWLOG_PREFIX +".delete"); + this.maxSourceCharsToLog = readSourceToLog(indexSettings); indexLogger.setLevel(level); deleteLogger.setLevel(level); @@ -101,6 +117,11 @@ synchronized void onRefreshSettings(Settings settings) { if (reformat != this.reformat) { this.reformat = reformat; } + + int maxSourceCharsToLog = readSourceToLog(settings); + if (maxSourceCharsToLog != this.maxSourceCharsToLog) { + this.maxSourceCharsToLog = maxSourceCharsToLog; + } } void postIndex(Engine.Index index, long tookInNanos) { @@ -111,27 +132,44 @@ void postCreate(Engine.Create create, long tookInNanos) { postIndexing(create.parsedDoc(), tookInNanos); } + /** + * Reads how much of the source to log. The user can specify any value they + * like and numbers are interpreted the maximum number of characters to log + * and everything else is interpreted as Elasticsearch interprets booleans + * which is then converted to 0 for false and Integer.MAX_VALUE for true. + */ + private int readSourceToLog(Settings settings) { + String sourceToLog = settings.get(INDEX_INDEXING_SLOWLOG_MAX_SOURCE_CHARS_TO_LOG, "1000"); + try { + return Integer.parseInt(sourceToLog, 10); + } catch (NumberFormatException e) { + return Booleans.parseBoolean(sourceToLog, true) ? Integer.MAX_VALUE : 0; + } + } + private void postIndexing(ParsedDocument doc, long tookInNanos) { if (indexWarnThreshold >= 0 && tookInNanos > indexWarnThreshold) { - indexLogger.warn("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat)); + indexLogger.warn("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat, maxSourceCharsToLog)); } else if (indexInfoThreshold >= 0 && tookInNanos > indexInfoThreshold) { - indexLogger.info("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat)); + indexLogger.info("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat, maxSourceCharsToLog)); } else if (indexDebugThreshold >= 0 && tookInNanos > indexDebugThreshold) { - indexLogger.debug("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat)); + indexLogger.debug("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat, maxSourceCharsToLog)); } else if (indexTraceThreshold >= 0 && tookInNanos > indexTraceThreshold) { - indexLogger.trace("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat)); + indexLogger.trace("{}", new SlowLogParsedDocumentPrinter(doc, tookInNanos, reformat, maxSourceCharsToLog)); } } - final static class SlowLogParsedDocumentPrinter { + static final class SlowLogParsedDocumentPrinter { private final ParsedDocument doc; private final long tookInNanos; private final boolean reformat; + private final int maxSourceCharsToLog; - SlowLogParsedDocumentPrinter(ParsedDocument doc, long tookInNanos, boolean reformat) { + SlowLogParsedDocumentPrinter(ParsedDocument doc, long tookInNanos, boolean reformat, int maxSourceCharsToLog) { this.doc = doc; this.tookInNanos = tookInNanos; this.reformat = reformat; + this.maxSourceCharsToLog = maxSourceCharsToLog; } @Override @@ -141,18 +179,19 @@ public String toString() { sb.append("type[").append(doc.type()).append("], "); sb.append("id[").append(doc.id()).append("], "); if (doc.routing() == null) { - sb.append("routing[], "); + sb.append("routing[] "); } else { - sb.append("routing[").append(doc.routing()).append("], "); + sb.append("routing[").append(doc.routing()).append("] "); } - if (doc.source() != null && doc.source().length() > 0) { - try { - sb.append("source[").append(XContentHelper.convertToJson(doc.source(), reformat)).append("]"); - } catch (IOException e) { - sb.append("source[_failed_to_convert_]"); - } - } else { - sb.append("source[]"); + + if (maxSourceCharsToLog == 0 || doc.source() == null || doc.source().length() == 0) { + return sb.toString(); + } + try { + String source = XContentHelper.convertToJson(doc.source(), reformat); + sb.append(", source[").append(Strings.cleanTruncate(source, maxSourceCharsToLog)).append("]"); + } catch (IOException e) { + sb.append(", source[_failed_to_convert_]"); } return sb.toString(); } diff --git a/core/src/main/java/org/elasticsearch/index/settings/IndexDynamicSettingsModule.java b/core/src/main/java/org/elasticsearch/index/settings/IndexDynamicSettingsModule.java index 804e64568dacf..c9fe0b4207fd3 100644 --- a/core/src/main/java/org/elasticsearch/index/settings/IndexDynamicSettingsModule.java +++ b/core/src/main/java/org/elasticsearch/index/settings/IndexDynamicSettingsModule.java @@ -30,7 +30,6 @@ import org.elasticsearch.cluster.settings.DynamicSettings; import org.elasticsearch.cluster.settings.Validator; import org.elasticsearch.common.inject.AbstractModule; -import org.elasticsearch.gateway.GatewayAllocator; import org.elasticsearch.index.engine.EngineConfig; import org.elasticsearch.index.indexing.IndexingSlowLog; import org.elasticsearch.index.search.stats.SearchSlowLog; @@ -86,6 +85,7 @@ public IndexDynamicSettingsModule() { indexDynamicSettings.addDynamicSetting(IndexingSlowLog.INDEX_INDEXING_SLOWLOG_THRESHOLD_INDEX_TRACE, Validator.TIME); indexDynamicSettings.addDynamicSetting(IndexingSlowLog.INDEX_INDEXING_SLOWLOG_REFORMAT); indexDynamicSettings.addDynamicSetting(IndexingSlowLog.INDEX_INDEXING_SLOWLOG_LEVEL); + indexDynamicSettings.addDynamicSetting(IndexingSlowLog.INDEX_INDEXING_SLOWLOG_MAX_SOURCE_CHARS_TO_LOG); indexDynamicSettings.addDynamicSetting(SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_QUERY_WARN, Validator.TIME); indexDynamicSettings.addDynamicSetting(SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_QUERY_INFO, Validator.TIME); indexDynamicSettings.addDynamicSetting(SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_QUERY_DEBUG, Validator.TIME); diff --git a/core/src/test/java/org/elasticsearch/common/StringsTests.java b/core/src/test/java/org/elasticsearch/common/StringsTests.java index 611894c6a642c..584428541e45e 100644 --- a/core/src/test/java/org/elasticsearch/common/StringsTests.java +++ b/core/src/test/java/org/elasticsearch/common/StringsTests.java @@ -20,11 +20,8 @@ package org.elasticsearch.common; import org.elasticsearch.test.ESTestCase; -import org.junit.Test; public class StringsTests extends ESTestCase { - - @Test public void testToCamelCase() { assertEquals("foo", Strings.toCamelCase("foo")); assertEquals("fooBar", Strings.toCamelCase("fooBar")); @@ -33,4 +30,32 @@ public void testToCamelCase() { assertEquals("fooBarFooBar", Strings.toCamelCase("foo_bar_foo_bar")); assertEquals("fooBar", Strings.toCamelCase("foo_bar_")); } + + public void testSubstring() { + assertEquals(null, Strings.substring(null, 0, 1000)); + assertEquals("foo", Strings.substring("foo", 0, 1000)); + assertEquals("foo", Strings.substring("foo", 0, 3)); + assertEquals("oo", Strings.substring("foo", 1, 3)); + assertEquals("oo", Strings.substring("foo", 1, 100)); + assertEquals("f", Strings.substring("foo", 0, 1)); + } + + public void testCleanTruncate() { + assertEquals(null, Strings.cleanTruncate(null, 10)); + assertEquals("foo", Strings.cleanTruncate("foo", 10)); + assertEquals("foo", Strings.cleanTruncate("foo", 3)); + // Throws out high surrogates + assertEquals("foo", Strings.cleanTruncate("foo\uD83D\uDEAB", 4)); + // But will keep the whole character + assertEquals("foo\uD83D\uDEAB", Strings.cleanTruncate("foo\uD83D\uDEAB", 5)); + /* + * Doesn't take care around combining marks. This example has its + * meaning changed because that last codepoint is supposed to combine + * backwards into the find "o" and be represented as the "o" with a + * circle around it with a slash through it. As in "no 'o's allowed + * here. + */ + assertEquals("o", Strings.cleanTruncate("o\uD83D\uDEAB", 1)); + assertEquals("", Strings.cleanTruncate("foo", 0)); + } } diff --git a/core/src/test/java/org/elasticsearch/index/indexing/IndexingSlowLogTests.java b/core/src/test/java/org/elasticsearch/index/indexing/IndexingSlowLogTests.java new file mode 100644 index 0000000000000..ccbef6837c98a --- /dev/null +++ b/core/src/test/java/org/elasticsearch/index/indexing/IndexingSlowLogTests.java @@ -0,0 +1,54 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.indexing; + +import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.IntField; +import org.apache.lucene.document.StringField; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.xcontent.json.JsonXContent; +import org.elasticsearch.index.indexing.IndexingSlowLog.SlowLogParsedDocumentPrinter; +import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.not; + +public class IndexingSlowLogTests extends ESTestCase { + public void testSlowLogParsedDocumentPrinterSourceToLog() throws IOException { + BytesReference source = JsonXContent.contentBuilder().startObject().field("foo", "bar").endObject().bytes(); + ParsedDocument pd = new ParsedDocument(new StringField("uid", "test:id", Store.YES), new IntField("version", 1, Store.YES), "id", + "test", null, 0, -1, null, source, null); + + // Turning off document logging doesn't log source[] + SlowLogParsedDocumentPrinter p = new SlowLogParsedDocumentPrinter(pd, 10, true, 0); + assertThat(p.toString(), not(containsString("source["))); + + // Turning on document logging logs the whole thing + p = new SlowLogParsedDocumentPrinter(pd, 10, true, Integer.MAX_VALUE); + assertThat(p.toString(), containsString("source[{\"foo\":\"bar\"}]")); + + // And you can truncate the source + p = new SlowLogParsedDocumentPrinter(pd, 10, true, 3); + assertThat(p.toString(), containsString("source[{\"f]")); + } +} diff --git a/docs/reference/index-modules/slowlog.asciidoc b/docs/reference/index-modules/slowlog.asciidoc index 15c1b217577af..9ae78b36d2898 100644 --- a/docs/reference/index-modules/slowlog.asciidoc +++ b/docs/reference/index-modules/slowlog.asciidoc @@ -68,10 +68,17 @@ index.indexing.slowlog.threshold.index.warn: 10s index.indexing.slowlog.threshold.index.info: 5s index.indexing.slowlog.threshold.index.debug: 2s index.indexing.slowlog.threshold.index.trace: 500ms +index.indexing.slowlog.level: info +index.indexing.slowlog.source: 1000 -------------------------------------------------- All of the above settings are _dynamic_ and can be set per-index. +By default Elasticsearch will log the first 1000 characters of the _source in +the slowlog. You can change that with `index.indexing.slowlog.source`. Setting +it to `false` or `0` will skip logging the source entirely an setting it to +`true` will log the entire source regardless of size. + The index slow log file is configured by default in the `logging.yml` file: