diff --git a/java/com/google/re2j/Matcher.java b/java/com/google/re2j/Matcher.java index eb848610..55b0ac4f 100644 --- a/java/com/google/re2j/Matcher.java +++ b/java/com/google/re2j/Matcher.java @@ -2,6 +2,8 @@ package com.google.re2j; +import java.util.Map; + /** * A stateful iterator that interprets a regex {@code Pattern} on a specific input. Its interface * mimics the JDK 1.4.2 {@code java.util.regex.Matcher}. @@ -37,6 +39,8 @@ public final class Matcher { // The group indexes, in [start, end) pairs. Zeroth pair is overall match. private final int[] groups; + private final Map namedGroups; + // The number of submatches (groups) in the pattern. private final int groupCount; @@ -66,6 +70,7 @@ private Matcher(Pattern pattern) { RE2 re2 = pattern.re2(); groupCount = re2.numberOfCapturingGroups(); groups = new int[2 + 2 * groupCount]; + namedGroups = re2.namedGroups; } /** Creates a new {@code Matcher} with the given pattern and input. */ @@ -137,6 +142,21 @@ public int start(int group) { return groups[2 * group]; } + /** + * Returns the start of the named group of the most recent match, or -1 if the group was not + * matched. + * + * @param group the group name + * @throws IllegalArgumentException if no group with that name exists + */ + public int start(String group) { + Integer g = namedGroups.get(group); + if (g == null) { + throw new IllegalArgumentException("group '" + group + "' not found"); + } + return start(g); + } + /** * Returns the end position of a subgroup of the most recent match. * @@ -149,6 +169,21 @@ public int end(int group) { return groups[2 * group + 1]; } + /** + * Returns the end of the named group of the most recent match, or -1 if the group was not + * matched. + * + * @param group the group name + * @throws IllegalArgumentException if no group with that name exists + */ + public int end(String group) { + Integer g = namedGroups.get(group); + if (g == null) { + throw new IllegalArgumentException("group '" + group + "' not found"); + } + return end(g); + } + /** * Returns the most recent match. * @@ -174,6 +209,20 @@ public String group(int group) { return substring(start, end); } + /** + * Returns the named group of the most recent match, or {@code null} if the group was not matched. + * + * @param group the group name + * @throws IllegalArgumentException if no group with that name exists + */ + public String group(String group) { + Integer g = namedGroups.get(group); + if (g == null) { + throw new IllegalArgumentException("group '" + group + "' not found"); + } + return group(g); + } + /** * Returns the number of subgroups in this pattern. * diff --git a/java/com/google/re2j/Parser.java b/java/com/google/re2j/Parser.java index 2b965052..5cbc8b2d 100644 --- a/java/com/google/re2j/Parser.java +++ b/java/com/google/re2j/Parser.java @@ -13,7 +13,9 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; /** * A parser of regular expression patterns. @@ -38,6 +40,7 @@ class Parser { private static final String ERR_MISSING_REPEAT_ARGUMENT = "missing argument to repetition operator"; private static final String ERR_TRAILING_BACKSLASH = "trailing backslash at end of expression"; + private static final String ERR_DUPLICATE_NAMED_CAPTURE = "duplicate capture group name"; // Hack to expose ArrayList.removeRange(). private static class Stack extends ArrayList { @@ -56,6 +59,7 @@ public void removeRange(int fromIndex, int toIndex) { private final Stack stack = new Stack(); private Regexp free; private int numCap = 0; // number of capturing groups seen + private Map namedGroups = new HashMap(); Parser(String wholeRegexp, int flags) { this.wholeRegexp = wholeRegexp; @@ -972,6 +976,7 @@ private Regexp parseInternal() throws PatternSyntaxException { if (n != 1) { throw new PatternSyntaxException(ERR_MISSING_PAREN, wholeRegexp); } + stack.get(0).namedGroups = namedGroups; return stack.get(0); } @@ -1062,6 +1067,9 @@ private void parsePerlFlags(StringIterator t) throws PatternSyntaxException { // Like ordinary capture, but named. Regexp re = op(Regexp.Op.LEFT_PAREN); re.cap = ++numCap; + if (namedGroups.put(name, numCap) != null) { + throw new PatternSyntaxException(ERR_DUPLICATE_NAMED_CAPTURE, name); + } re.name = name; return; } diff --git a/java/com/google/re2j/RE2.java b/java/com/google/re2j/RE2.java index a20bd4d2..980fbd71 100644 --- a/java/com/google/re2j/RE2.java +++ b/java/com/google/re2j/RE2.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Map; import java.util.Queue; /** @@ -116,6 +117,7 @@ class RE2 { // Accesses must be serialized using |this| monitor. // @GuardedBy("this") private final Queue machine = new ArrayDeque(); + public Map namedGroups; // This is visible for testing. RE2(String expr) { @@ -195,6 +197,7 @@ static RE2 compileImpl(String expr, int mode, boolean longest) throws PatternSyn if (!re2.prefix.isEmpty()) { re2.prefixRune = re2.prefix.codePointAt(0); } + re2.namedGroups = re.namedGroups; return re2; } diff --git a/java/com/google/re2j/Regexp.java b/java/com/google/re2j/Regexp.java index 26f0bbe3..a4f36947 100644 --- a/java/com/google/re2j/Regexp.java +++ b/java/com/google/re2j/Regexp.java @@ -8,6 +8,7 @@ package com.google.re2j; import java.util.Arrays; +import java.util.Map; /** * Regular expression abstract syntax tree. Produced by parser, used by compiler. NB, this @@ -56,6 +57,7 @@ boolean isPseudo() { int min, max; // min, max for REPEAT int cap; // capturing index, for CAPTURE String name; // capturing name, for CAPTURE + Map namedGroups; // map of group name -> capturing index // Do update copy ctor when adding new fields! Regexp(Op op) { @@ -72,6 +74,7 @@ boolean isPseudo() { this.max = that.max; this.cap = that.cap; this.name = that.name; + this.namedGroups = that.namedGroups; } void reinit() { diff --git a/javatests/com/google/re2j/MatcherTest.java b/javatests/com/google/re2j/MatcherTest.java index 7d624e9f..443009ac 100644 --- a/javatests/com/google/re2j/MatcherTest.java +++ b/javatests/com/google/re2j/MatcherTest.java @@ -431,4 +431,34 @@ public void testMutableCharSequence() { b.replace(b.indexOf("ban"), start + 3, "b"); assertTrue(m.find(b.indexOf("ban"))); } + + @Test + public void testNamedGroups() { + Pattern p = + Pattern.compile( + "(?Pf(?Pb*a(?Pr+)){0,10})" + "(?Pbag)?(?Pzzz)?"); + Matcher m = p.matcher("fbbarrrrrbag"); + assertTrue(m.matches()); + assertEquals("fbbarrrrr", m.group("baz")); + assertEquals("bbarrrrr", m.group("foo")); + assertEquals("rrrrr", m.group("another")); + assertEquals(0, m.start("baz")); + assertEquals(1, m.start("foo")); + assertEquals(4, m.start("another")); + assertEquals(9, m.end("baz")); + assertEquals(9, m.end("foo")); + assertEquals("bag", m.group("bag")); + assertEquals(9, m.start("bag")); + assertEquals(12, m.end("bag")); + assertEquals(null, m.group("nomatch")); + assertEquals(-1, m.start("nomatch")); + assertEquals(-1, m.end("nomatch")); + + try { + m.group("nonexistent"); + fail("Should have thrown IllegalArgumentException"); + } catch (IllegalArgumentException expected) { + // Expected + } + } } diff --git a/javatests/com/google/re2j/ParserTest.java b/javatests/com/google/re2j/ParserTest.java index 3536c3ee..2317ee66 100644 --- a/javatests/com/google/re2j/ParserTest.java +++ b/javatests/com/google/re2j/ParserTest.java @@ -528,6 +528,8 @@ private static String runesToString(int[] runes) { "(?i)[a-Z]", "a{100000}", "a{100000,}", + // Group names may not be repeated + "(?Pbar)(?Pbaz)", }; private static final String[] ONLY_PERL = {