Skip to content

Commit

Permalink
[7.17] Enhance regex performance with duplicate wildcards (#98176) (#…
Browse files Browse the repository at this point in the history
…98277)

This change avoids unnecessary substring allocations and recursion calls
when more than two consecutive wildcards (`*`) are detected. Instead
skipping and calling a method recursively, we now try to skip all
consecutive `*` chars at once.
  • Loading branch information
slobodanadamovic committed Aug 8, 2023
1 parent 842e8a4 commit 6f66c75
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 2 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/98176.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 98176
summary: Enhance regex performance with duplicate wildcards
area: Infra/Core
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,12 @@ private static boolean simpleMatchWithNormalizedStrings(String pattern, String s
// str.endsWith(pattern.substring(1)), but avoiding the construction of pattern.substring(1):
return str.regionMatches(str.length() - pattern.length() + 1, pattern, 1, pattern.length() - 1);
} else if (nextIndex == 1) {
// Double wildcard "**" - skipping the first "*"
return simpleMatchWithNormalizedStrings(pattern.substring(1), str);
// Double wildcard "**" detected - skipping all "*"
int wildcards = nextIndex + 1;
while (wildcards < pattern.length() && pattern.charAt(wildcards) == '*') {
wildcards++;
}
return simpleMatchWithNormalizedStrings(pattern.substring(wildcards - 1), str);
}
final String part = pattern.substring(1, nextIndex);
int partIndex = str.indexOf(part);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,28 @@ public void testDoubleWildcardMatch() {
assertFalse(Regex.simpleMatch("fff******ddd", "fffabcdd"));
}

public void testArbitraryWildcardMatch() {
final String prefix = randomAlphaOfLengthBetween(1, 20);
final String suffix = randomAlphaOfLengthBetween(1, 20);
final String pattern1 = repeat("*", randomIntBetween(1, 1000));
// dd***
assertTrue(Regex.simpleMatch(prefix + pattern1, prefix + randomAlphaOfLengthBetween(10, 20), randomBoolean()));
// ***dd
assertTrue(Regex.simpleMatch(pattern1 + suffix, randomAlphaOfLengthBetween(10, 20) + suffix, randomBoolean()));
// dd***dd
assertTrue(Regex.simpleMatch(prefix + pattern1 + suffix, prefix + randomAlphaOfLengthBetween(10, 20) + suffix, randomBoolean()));
// dd***dd***dd
final String middle = randomAlphaOfLengthBetween(1, 20);
final String pattern2 = repeat("*", randomIntBetween(1, 1000));
assertTrue(
Regex.simpleMatch(
prefix + pattern1 + middle + pattern2 + suffix,
prefix + randomAlphaOfLengthBetween(10, 20) + middle + randomAlphaOfLengthBetween(10, 20) + suffix,
randomBoolean()
)
);
}

public void testSimpleMatch() {
for (int i = 0; i < 1000; i++) {
final String matchingString = randomAlphaOfLength(between(0, 50));
Expand Down Expand Up @@ -189,4 +211,12 @@ private void assertMatchesNone(Automaton automaton, String... strings) {
assertFalse(run.run(s));
}
}

private String repeat(String str, int count) {
StringBuilder sb = new StringBuilder(str.length() * count);
for (int i = 0; i < count; i++) {
sb.append(str);
}
return sb.toString();
}
}

0 comments on commit 6f66c75

Please sign in to comment.