Skip to content

Commit

Permalink
regexp/syntax: accept (?<name>...) syntax as valid capture
Browse files Browse the repository at this point in the history
Currently the only named capture supported by regexp is (?P<name>a).

The syntax (?<name>a) is also widely used and there is currently an effort from
 the Rust regex and RE2 teams to also accept this syntax.

Fixes #58458

Change-Id: If22d44d3a5c4e8133ec68238ab130c151ca7c5c5
GitHub-Last-Rev: 31b50e6
GitHub-Pull-Request: #61624
Reviewed-on: https://go-review.googlesource.com/c/go/+/513838
Auto-Submit: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Ian Lance Taylor <iant@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
  • Loading branch information
mauri870 authored and gopherbot committed Jul 31, 2023
1 parent 977e23a commit ee61186
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 5 deletions.
1 change: 1 addition & 0 deletions src/regexp/syntax/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ Grouping:
(re) numbered capturing group (submatch)
(?P<name>re) named & numbered capturing group (submatch)
(?<name>re) named & numbered capturing group (submatch)
(?:re) non-capturing group
(?flags) set flags within current group; non-capturing
(?flags:re) set flags during re; non-capturing
Expand Down
19 changes: 14 additions & 5 deletions src/regexp/syntax/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -1159,9 +1159,18 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) {
// support all three as well. EcmaScript 4 uses only the Python form.
//
// In both the open source world (via Code Search) and the
// Google source tree, (?P<expr>name) is the dominant form,
// so that's the one we implement. One is enough.
if len(t) > 4 && t[2] == 'P' && t[3] == '<' {
// Google source tree, (?P<expr>name) and (?<expr>name) are the
// dominant forms of named captures and both are supported.
startsWithP := len(t) > 4 && t[2] == 'P' && t[3] == '<'
startsWithName := len(t) > 3 && t[2] == '<'

if startsWithP || startsWithName {
// position of expr start
exprStartPos := 4
if startsWithName {
exprStartPos = 3
}

// Pull out name.
end := strings.IndexRune(t, '>')
if end < 0 {
Expand All @@ -1171,8 +1180,8 @@ func (p *parser) parsePerlFlags(s string) (rest string, err error) {
return "", &Error{ErrInvalidNamedCapture, s}
}

capture := t[:end+1] // "(?P<name>"
name := t[4:end] // "name"
capture := t[:end+1] // "(?P<name>" or "(?<name>"
name := t[exprStartPos:end] // "name"
if err = checkUTF8(name); err != nil {
return "", err
}
Expand Down
6 changes: 6 additions & 0 deletions src/regexp/syntax/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ var parseTests = []parseTest{

// Test named captures
{`(?P<name>a)`, `cap{name:lit{a}}`},
{`(?<name>a)`, `cap{name:lit{a}}`},

// Case-folded literals
{`[Aa]`, `litfold{A}`},
Expand Down Expand Up @@ -482,6 +483,11 @@ var invalidRegexps = []string{
`(?P<name`,
`(?P<x y>a)`,
`(?P<>a)`,
`(?<name>a`,
`(?<name>`,
`(?<name`,
`(?<x y>a)`,
`(?<>a)`,
`[a-Z]`,
`(?i)[a-Z]`,
`\Q\E*`,
Expand Down

0 comments on commit ee61186

Please sign in to comment.