Skip to content

Commit

Permalink
[ML] Fix custom timestamp override with dot-separated fractional seco…
Browse files Browse the repository at this point in the history
…nds (#44127)

Custom timestamp overrides provided to the find_file_structure
endpoint produced an invalid Grok pattern if the fractional
seconds separator was a dot rather than a comma or colon.
This commit fixes that problem and adds tests for this sort
of timestamp override.

Fixes #44110
  • Loading branch information
droberts195 committed Jul 10, 2019
1 parent 913b6a6 commit 853ddb5
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -279,9 +279,10 @@ static Tuple<String, String> overrideFormatToGrokAndRegex(String overrideFormat)
}
throw new IllegalArgumentException(msg);
}
// No need to append to the Grok pattern as %{SECOND} already allows for an optional
// fraction, but we need to remove the separator that's included in %{SECOND}
grokPatternBuilder.deleteCharAt(grokPatternBuilder.length() - 1);
// No need to append to the Grok pattern as %{SECOND} already allows for an optional fraction,
// but we need to remove the separator that's included in %{SECOND} (and that might be escaped)
int numCharsToDelete = (PUNCTUATION_THAT_NEEDS_ESCAPING_IN_REGEX.indexOf(prevChar) >= 0) ? 2 : 1;
grokPatternBuilder.delete(grokPatternBuilder.length() - numCharsToDelete, grokPatternBuilder.length());
regexBuilder.append("\\d{").append(endPos - startPos).append('}');
} else {
grokPatternBuilder.append(grokPatternAndRegexForGroup.v1());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -737,16 +737,27 @@ public void testCustomOverrideMatchingBuiltInFormat() {
assertEquals(1, lenientTimestampFormatFinder.getNumMatchedFormats());
}

public void testCustomOverrideNotMatchingBuiltInFormat() {
public void testCustomOverridesNotMatchingBuiltInFormat() {

String overrideFormat = "MM/dd HH.mm.ss,SSSSSS 'in' yyyy";
String text = "05/15 17.14.56,374946 in 2018";
String expectedSimpleRegex = "\\b\\d{2}/\\d{2} \\d{2}\\.\\d{2}\\.\\d{2},\\d{6} in \\d{4}\\b";
String expectedGrokPatternName = "CUSTOM_TIMESTAMP";
Map<String, String> expectedCustomGrokPatternDefinitions =
validateCustomOverrideNotMatchingBuiltInFormat("MM/dd HH.mm.ss,SSSSSS 'in' yyyy", "05/15 17.14.56,374946 in 2018",
"\\b\\d{2}/\\d{2} \\d{2}\\.\\d{2}\\.\\d{2},\\d{6} in \\d{4}\\b", "CUSTOM_TIMESTAMP",
Collections.singletonMap(TimestampFormatFinder.CUSTOM_TIMESTAMP_GROK_NAME,
"%{MONTHNUM2}/%{MONTHDAY} %{HOUR}\\.%{MINUTE}\\.%{SECOND} in %{YEAR}");
"%{MONTHNUM2}/%{MONTHDAY} %{HOUR}\\.%{MINUTE}\\.%{SECOND} in %{YEAR}"));

validateCustomOverrideNotMatchingBuiltInFormat("'some_prefix 'dd.MM.yyyy HH:mm:ss.SSSSSS", "some_prefix 06.01.2018 16:56:14.295748",
"some_prefix \\d{2}\\.\\d{2}\\.\\d{4} \\d{2}:\\d{2}:\\d{2}\\.\\d{6}\\b", "CUSTOM_TIMESTAMP",
Collections.singletonMap(TimestampFormatFinder.CUSTOM_TIMESTAMP_GROK_NAME,
"some_prefix %{MONTHDAY}\\.%{MONTHNUM2}\\.%{YEAR} %{HOUR}:%{MINUTE}:%{SECOND}"));

validateCustomOverrideNotMatchingBuiltInFormat("dd.MM. yyyy HH:mm:ss.SSSSSS", "06.01. 2018 16:56:14.295748",
"\\b\\d{2}\\.\\d{2}\\. \\d{4} \\d{2}:\\d{2}:\\d{2}\\.\\d{6}\\b", "CUSTOM_TIMESTAMP",
Collections.singletonMap(TimestampFormatFinder.CUSTOM_TIMESTAMP_GROK_NAME,
"%{MONTHDAY}\\.%{MONTHNUM2}\\. %{YEAR} %{HOUR}:%{MINUTE}:%{SECOND}"));
}

private void validateCustomOverrideNotMatchingBuiltInFormat(String overrideFormat, String text, String expectedSimpleRegex,
String expectedGrokPatternName,
Map<String, String> expectedCustomGrokPatternDefinitions) {
TimestampFormatFinder strictTimestampFormatFinder = new TimestampFormatFinder(explanation, overrideFormat, true, true, true,
NOOP_TIMEOUT_CHECKER);
strictTimestampFormatFinder.addSample(text);
Expand Down

0 comments on commit 853ddb5

Please sign in to comment.