Skip to content

Commit

Permalink
[7.7] Ensure Joni warning are logged at debug (#57302) (#57895)
Browse files Browse the repository at this point in the history
When Joni, the regex engine that powers grok emits a warning it
does so by default to System.err. System.err logs are all bucketed
together in the server log at WARN level. When Joni emits a warning,
it can be extremely verbose, logging a message for each execution
again that pattern. For ingest node that means for every document
that is run that Grok filter. Fortunately, Joni provides a call
back hook to push these warnings to a custom location.

This commit implements Joni's callback hook to push the Joni warning
to the Elasticsearch server logger (logger.org.elasticsearch.ingest.common.GrokProcessor)
at debug level. A warning will be emitted on processor creation at WARN
level. Generally these warning indicate a possible issue with
the regular expression. Since Grok is an abstraction on top of the regex,
these warning may not provide much value outside of a troubleshooting
scenario.

Additionally, the documentation is updated with instructions for how
to set the logger to debug level.
  • Loading branch information
jakelandis committed Jun 9, 2020
1 parent 71564c4 commit a767730
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 49 deletions.
20 changes: 20 additions & 0 deletions docs/reference/ingest/processors/grok.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -324,3 +324,23 @@ settings:
| `ingest.grok.watchdog.interval` | 1s | How often to check whether there are grok evaluations that take longer than the maximum allowed execution time.
| `ingest.grok.watchdog.max_execution_time` | 1s | The maximum allowed execution of a grok expression evaluation.
|======

[[grok-debugging]]
==== Grok debugging

It is advised to use the {kibana-ref}/xpack-grokdebugger.html[Grok Debugger] to debug grok patterns. From there you can test one or more
patterns in the UI against sample data. Under the covers it uses the same engine as ingest node processor.

Additionally, it is recommended to enable debug logging for Grok so that any additional messages may also be seen in the Elasticsearch
server log.

[source,js]
--------------------------------------------------
PUT _cluster/settings
{
"transient": {
"logger.org.elasticsearch.ingest.common.GrokProcessor": "debug"
}
}
--------------------------------------------------
// NOTCONSOLE
19 changes: 11 additions & 8 deletions libs/grok/src/main/java/org/elasticsearch/grok/Grok.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.function.Consumer;

public final class Grok {

Expand Down Expand Up @@ -77,19 +78,20 @@ public final class Grok {
private final Regex compiledExpression;
private final MatcherWatchdog matcherWatchdog;

public Grok(Map<String, String> patternBank, String grokPattern) {
this(patternBank, grokPattern, true, MatcherWatchdog.noop());
public Grok(Map<String, String> patternBank, String grokPattern, Consumer<String> logCallBack) {
this(patternBank, grokPattern, true, MatcherWatchdog.noop(), logCallBack);
}

public Grok(Map<String, String> patternBank, String grokPattern, MatcherWatchdog matcherWatchdog) {
this(patternBank, grokPattern, true, matcherWatchdog);
public Grok(Map<String, String> patternBank, String grokPattern, MatcherWatchdog matcherWatchdog, Consumer<String> logCallBack) {
this(patternBank, grokPattern, true, matcherWatchdog, logCallBack);
}

Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures) {
this(patternBank, grokPattern, namedCaptures, MatcherWatchdog.noop());
Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures, Consumer<String> logCallBack) {
this(patternBank, grokPattern, namedCaptures, MatcherWatchdog.noop(), logCallBack);
}

private Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures, MatcherWatchdog matcherWatchdog) {
private Grok(Map<String, String> patternBank, String grokPattern, boolean namedCaptures, MatcherWatchdog matcherWatchdog,
Consumer<String> logCallBack) {
this.patternBank = patternBank;
this.namedCaptures = namedCaptures;
this.matcherWatchdog = matcherWatchdog;
Expand All @@ -102,7 +104,8 @@ private Grok(Map<String, String> patternBank, String grokPattern, boolean namedC

String expression = toRegex(grokPattern);
byte[] expressionBytes = expression.getBytes(StandardCharsets.UTF_8);
this.compiledExpression = new Regex(expressionBytes, 0, expressionBytes.length, Option.DEFAULT, UTF8Encoding.INSTANCE);
this.compiledExpression = new Regex(expressionBytes, 0, expressionBytes.length, Option.DEFAULT, UTF8Encoding.INSTANCE,
message -> logCallBack.accept(message));
}

/**
Expand Down
73 changes: 42 additions & 31 deletions libs/grok/src/test/java/org/elasticsearch/grok/GrokTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiConsumer;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;
Expand All @@ -40,19 +42,19 @@ public class GrokTests extends ESTestCase {

public void testMatchWithoutCaptures() {
String line = "value";
Grok grok = new Grok(basePatterns, "value");
Grok grok = new Grok(basePatterns, "value", logger::warn);
Map<String, Object> matches = grok.captures(line);
assertEquals(0, matches.size());
}

public void testNoMatchingPatternInDictionary() {
Exception e = expectThrows(IllegalArgumentException.class, () -> new Grok(Collections.emptyMap(), "%{NOTFOUND}"));
Exception e = expectThrows(IllegalArgumentException.class, () -> new Grok(Collections.emptyMap(), "%{NOTFOUND}", logger::warn));
assertThat(e.getMessage(), equalTo("Unable to find pattern [NOTFOUND] in Grok's pattern dictionary"));
}

public void testSimpleSyslogLine() {
String line = "Mar 16 00:01:25 evita postfix/smtpd[1713]: connect from camomile.cloud9.net[168.100.1.3]";
Grok grok = new Grok(basePatterns, "%{SYSLOGLINE}");
Grok grok = new Grok(basePatterns, "%{SYSLOGLINE}", logger::warn);
Map<String, Object> matches = grok.captures(line);
assertEquals("evita", matches.get("logsource"));
assertEquals("Mar 16 00:01:25", matches.get("timestamp"));
Expand All @@ -64,7 +66,7 @@ public void testSimpleSyslogLine() {
public void testSyslog5424Line() {
String line = "<191>1 2009-06-30T18:30:00+02:00 paxton.local grokdebug 4123 - [id1 foo=\\\"bar\\\"][id2 baz=\\\"something\\\"] " +
"Hello, syslog.";
Grok grok = new Grok(basePatterns, "%{SYSLOG5424LINE}");
Grok grok = new Grok(basePatterns, "%{SYSLOG5424LINE}", logger::warn);
Map<String, Object> matches = grok.captures(line);
assertEquals("191", matches.get("syslog5424_pri"));
assertEquals("1", matches.get("syslog5424_ver"));
Expand All @@ -79,21 +81,21 @@ public void testSyslog5424Line() {

public void testDatePattern() {
String line = "fancy 12-12-12 12:12:12";
Grok grok = new Grok(basePatterns, "(?<timestamp>%{DATE_EU} %{TIME})");
Grok grok = new Grok(basePatterns, "(?<timestamp>%{DATE_EU} %{TIME})", logger::warn);
Map<String, Object> matches = grok.captures(line);
assertEquals("12-12-12 12:12:12", matches.get("timestamp"));
}

public void testNilCoercedValues() {
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration:float}ms)");
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration:float}ms)", logger::warn);
Map<String, Object> matches = grok.captures("test 28.4ms");
assertEquals(28.4f, matches.get("duration"));
matches = grok.captures("test N/A");
assertEquals(null, matches.get("duration"));
}

public void testNilWithNoCoercion() {
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration}ms)");
Grok grok = new Grok(basePatterns, "test (N/A|%{BASE10NUM:duration}ms)", logger::warn);
Map<String, Object> matches = grok.captures("test 28.4ms");
assertEquals("28.4", matches.get("duration"));
matches = grok.captures("test N/A");
Expand All @@ -103,7 +105,7 @@ public void testNilWithNoCoercion() {
public void testUnicodeSyslog() {
Grok grok = new Grok(basePatterns, "<%{POSINT:syslog_pri}>%{SPACE}%{SYSLOGTIMESTAMP:syslog_timestamp} " +
"%{SYSLOGHOST:syslog_hostname} %{PROG:syslog_program}(:?)(?:\\[%{GREEDYDATA:syslog_pid}\\])?(:?) " +
"%{GREEDYDATA:syslog_message}");
"%{GREEDYDATA:syslog_message}", logger::warn);
Map<String, Object> matches = grok.captures("<22>Jan 4 07:50:46 mailmaster postfix/policy-spf[9454]: : " +
"SPF permerror (Junk encountered in record 'v=spf1 mx a:mail.domain.no ip4:192.168.0.4 �all'): Envelope-from: " +
"email@domain.no");
Expand All @@ -113,19 +115,19 @@ public void testUnicodeSyslog() {
}

public void testNamedFieldsWithWholeTextMatch() {
Grok grok = new Grok(basePatterns, "%{DATE_EU:stimestamp}");
Grok grok = new Grok(basePatterns, "%{DATE_EU:stimestamp}", logger::warn);
Map<String, Object> matches = grok.captures("11/01/01");
assertThat(matches.get("stimestamp"), equalTo("11/01/01"));
}

public void testWithOniguramaNamedCaptures() {
Grok grok = new Grok(basePatterns, "(?<foo>\\w+)");
Grok grok = new Grok(basePatterns, "(?<foo>\\w+)", logger::warn);
Map<String, Object> matches = grok.captures("hello world");
assertThat(matches.get("foo"), equalTo("hello"));
}

public void testISO8601() {
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$");
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$", logger::warn);
List<String> timeMessages = Arrays.asList(
"2001-01-01T00:00:00",
"1974-03-02T04:09:09",
Expand All @@ -149,7 +151,7 @@ public void testISO8601() {
}

public void testNotISO8601() {
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$");
Grok grok = new Grok(basePatterns, "^%{TIMESTAMP_ISO8601}$", logger::warn);
List<String> timeMessages = Arrays.asList(
"2001-13-01T00:00:00", // invalid month
"2001-00-01T00:00:00", // invalid month
Expand Down Expand Up @@ -188,7 +190,7 @@ public void testNoNamedCaptures() {

String text = "wowza !!!Tal!!! - Tal";
String pattern = "%{EXCITED_NAME} - %{NAME}";
Grok g = new Grok(bank, pattern, false);
Grok g = new Grok(bank, pattern, false, logger::warn);

assertEquals("(?<EXCITED_NAME_0>!!!(?<NAME_21>Tal)!!!) - (?<NAME_22>Tal)", g.toRegex(pattern));
assertEquals(true, g.match(text));
Expand All @@ -206,23 +208,23 @@ public void testCircularReference() {
Map<String, String> bank = new HashMap<>();
bank.put("NAME", "!!!%{NAME}!!!");
String pattern = "%{NAME}";
new Grok(bank, pattern, false);
new Grok(bank, pattern, false, logger::warn);
});
assertEquals("circular reference in pattern [NAME][!!!%{NAME}!!!]", e.getMessage());

e = expectThrows(IllegalArgumentException.class, () -> {
Map<String, String> bank = new HashMap<>();
bank.put("NAME", "!!!%{NAME:name}!!!");
String pattern = "%{NAME}";
new Grok(bank, pattern, false);
new Grok(bank, pattern, false, logger::warn);
});
assertEquals("circular reference in pattern [NAME][!!!%{NAME:name}!!!]", e.getMessage());

e = expectThrows(IllegalArgumentException.class, () -> {
Map<String, String> bank = new HashMap<>();
bank.put("NAME", "!!!%{NAME:name:int}!!!");
String pattern = "%{NAME}";
new Grok(bank, pattern, false);
new Grok(bank, pattern, false, logger::warn);
});
assertEquals("circular reference in pattern [NAME][!!!%{NAME:name:int}!!!]", e.getMessage());

Expand All @@ -231,7 +233,7 @@ public void testCircularReference() {
bank.put("NAME1", "!!!%{NAME2}!!!");
bank.put("NAME2", "!!!%{NAME1}!!!");
String pattern = "%{NAME1}";
new Grok(bank, pattern, false);
new Grok(bank, pattern, false, logger::warn);
});
assertEquals("circular reference in pattern [NAME2][!!!%{NAME1}!!!] back to pattern [NAME1]", e.getMessage());

Expand All @@ -241,7 +243,7 @@ public void testCircularReference() {
bank.put("NAME2", "!!!%{NAME3}!!!");
bank.put("NAME3", "!!!%{NAME1}!!!");
String pattern = "%{NAME1}";
new Grok(bank, pattern, false);
new Grok(bank, pattern, false, logger::warn);
});
assertEquals("circular reference in pattern [NAME3][!!!%{NAME1}!!!] back to pattern [NAME1] via patterns [NAME2]",
e.getMessage());
Expand All @@ -254,15 +256,15 @@ public void testCircularReference() {
bank.put("NAME4", "!!!%{NAME5}!!!");
bank.put("NAME5", "!!!%{NAME1}!!!");
String pattern = "%{NAME1}";
new Grok(bank, pattern, false);
new Grok(bank, pattern, false, logger::warn );
});
assertEquals("circular reference in pattern [NAME5][!!!%{NAME1}!!!] back to pattern [NAME1] " +
"via patterns [NAME2=>NAME3=>NAME4]", e.getMessage());
}

public void testBooleanCaptures() {
String pattern = "%{WORD:name}=%{WORD:status:boolean}";
Grok g = new Grok(basePatterns, pattern);
Grok g = new Grok(basePatterns, pattern, logger::warn);

String text = "active=true";
Map<String, Object> expected = new HashMap<>();
Expand All @@ -279,7 +281,7 @@ public void testNumericCaptures() {
bank.put("NUMBER", "(?:%{BASE10NUM})");

String pattern = "%{NUMBER:bytes:float} %{NUMBER:id:long} %{NUMBER:rating:double}";
Grok g = new Grok(bank, pattern);
Grok g = new Grok(bank, pattern, logger::warn);

String text = "12009.34 20000000000 4820.092";
Map<String, Object> expected = new HashMap<>();
Expand All @@ -297,7 +299,7 @@ public void testNumericCapturesCoercion() {
bank.put("NUMBER", "(?:%{BASE10NUM})");

String pattern = "%{NUMBER:bytes:float} %{NUMBER:status} %{NUMBER}";
Grok g = new Grok(bank, pattern);
Grok g = new Grok(bank, pattern, logger::warn);

String text = "12009.34 200 9032";
Map<String, Object> expected = new HashMap<>();
Expand All @@ -312,7 +314,7 @@ public void testApacheLog() {
String logLine = "31.184.238.164 - - [24/Jul/2014:05:35:37 +0530] \"GET /logs/access.log HTTP/1.0\" 200 69849 " +
"\"http://8rursodiol.enjin.com\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " +
"Chrome/30.0.1599.12785 YaBrowser/13.12.1599.12785 Safari/537.36\" \"www.dlwindianrailways.com\"";
Grok grok = new Grok(basePatterns, "%{COMBINEDAPACHELOG}");
Grok grok = new Grok(basePatterns, "%{COMBINEDAPACHELOG}", logger::warn);
Map<String, Object> matches = grok.captures(logLine);

assertEquals("31.184.238.164", matches.get("clientip"));
Expand Down Expand Up @@ -372,7 +374,7 @@ public void testComplete() {
String pattern = "%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \\[%{HTTPDATE:timestamp}\\] \"%{WORD:verb} %{DATA:request} " +
"HTTP/%{NUMBER:httpversion}\" %{NUMBER:response:int} (?:-|%{NUMBER:bytes:int}) %{QS:referrer} %{QS:agent}";

Grok grok = new Grok(bank, pattern);
Grok grok = new Grok(bank, pattern, logger::warn);

Map<String, Object> expected = new HashMap<>();
expected.put("clientip", "83.149.9.216");
Expand All @@ -396,14 +398,14 @@ public void testComplete() {
public void testNoMatch() {
Map<String, String> bank = new HashMap<>();
bank.put("MONTHDAY", "(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])");
Grok grok = new Grok(bank, "%{MONTHDAY:greatday}");
Grok grok = new Grok(bank, "%{MONTHDAY:greatday}", logger::warn);
assertThat(grok.captures("nomatch"), nullValue());
}

public void testMultipleNamedCapturesWithSameName() {
Map<String, String> bank = new HashMap<>();
bank.put("SINGLEDIGIT", "[0-9]");
Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}");
Grok grok = new Grok(bank, "%{SINGLEDIGIT:num}%{SINGLEDIGIT:num}", logger::warn);

Map<String, Object> expected = new HashMap<>();
expected.put("num", "1");
Expand All @@ -430,7 +432,8 @@ public void testExponentialExpressions() {
});
t.start();
};
Grok grok = new Grok(basePatterns, grokPattern, MatcherWatchdog.newInstance(10, 200, System::currentTimeMillis, scheduler));
Grok grok = new Grok(basePatterns, grokPattern, MatcherWatchdog.newInstance(10, 200, System::currentTimeMillis, scheduler),
logger::warn);
Exception e = expectThrows(RuntimeException.class, () -> grok.captures(logLine));
run.set(false);
assertThat(e.getMessage(), equalTo("grok pattern matching was interrupted after [200] ms"));
Expand Down Expand Up @@ -470,24 +473,32 @@ public void testAlphanumericFieldName() {
}

public void testUnsupportedBracketsInFieldName() {
Grok grok = new Grok(basePatterns, "%{WORD:unsuppo(r)ted}");
Grok grok = new Grok(basePatterns, "%{WORD:unsuppo(r)ted}", logger::warn);
Map<String, Object> matches = grok.captures("line");
assertNull(matches);
}

public void testJavaClassPatternWithUnderscore() {
Grok grok = new Grok(basePatterns, "%{JAVACLASS}");
Grok grok = new Grok(basePatterns, "%{JAVACLASS}", logger::warn);
assertThat(grok.match("Test_Class.class"), is(true));
}

public void testJavaFilePatternWithSpaces() {
Grok grok = new Grok(basePatterns, "%{JAVAFILE}");
Grok grok = new Grok(basePatterns, "%{JAVAFILE}", logger::warn);
assertThat(grok.match("Test Class.java"), is(true));
}

public void testLogCallBack(){
AtomicReference<String> message = new AtomicReference<>();
Grok grok = new Grok(basePatterns, ".*\\[.*%{SPACE}*\\].*", message::set);
grok.match("[foo]");
//this message comes from Joni, so updates to Joni may change the expectation
assertThat(message.get(), containsString("regular expression has redundant nested repeat operator"));
}

private void assertGrokedField(String fieldName) {
String line = "foo";
Grok grok = new Grok(basePatterns, "%{WORD:" + fieldName + "}");
Grok grok = new Grok(basePatterns, "%{WORD:" + fieldName + "}", logger::warn);
Map<String, Object> matches = grok.captures(line);
assertEquals(line, matches.get(fieldName));
}
Expand Down
13 changes: 13 additions & 0 deletions modules/ingest-common/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,16 @@ testClusters.integTest {
// (this is because the integTest node is not using default distribution, but only the minimal number of required modules)
module project(':modules:lang-mustache').tasks.bundlePlugin.archiveFile
}

thirdPartyAudit.ignoreMissingClasses(
// from log4j
'org.osgi.framework.AdaptPermission',
'org.osgi.framework.AdminPermission',
'org.osgi.framework.Bundle',
'org.osgi.framework.BundleActivator',
'org.osgi.framework.BundleContext',
'org.osgi.framework.BundleEvent',
'org.osgi.framework.SynchronousBundleListener',
'org.osgi.framework.wiring.BundleWire',
'org.osgi.framework.wiring.BundleWiring'
)
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

package org.elasticsearch.ingest.common;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.grok.Grok;
import org.elasticsearch.grok.MatcherWatchdog;
import org.elasticsearch.ingest.AbstractProcessor;
Expand All @@ -36,6 +38,7 @@ public final class GrokProcessor extends AbstractProcessor {

public static final String TYPE = "grok";
private static final String PATTERN_MATCH_KEY = "_ingest._grok_match_index";
private static final Logger logger = LogManager.getLogger(GrokProcessor.class);

private final String matchField;
private final List<String> matchPatterns;
Expand All @@ -48,9 +51,12 @@ public final class GrokProcessor extends AbstractProcessor {
super(tag);
this.matchField = matchField;
this.matchPatterns = matchPatterns;
this.grok = new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog);
this.grok = new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog, logger::debug);
this.traceMatch = traceMatch;
this.ignoreMissing = ignoreMissing;
// Joni warnings are only emitted on an attempt to match, and the warning emitted for every call to match which is too verbose
// so here we emit a warning (if there is one) to the logfile at warn level on construction / processor creation.
new Grok(patternBank, combinePatterns(matchPatterns, traceMatch), matcherWatchdog, logger::warn).match("___nomatch___");
}

@Override
Expand Down

0 comments on commit a767730

Please sign in to comment.