Skip to content

Commit 2b65cde

Browse files
rscdmitshur
authored andcommitted
[release-branch.go1.16] regexp/syntax: reject very deeply nested regexps in Parse
The regexp code assumes it can recurse over the structure of a regexp safely. Go's growable stacks make that reasonable for all plausible regexps, but implausible ones can reach the “infinite recursion?” stack limit. This CL limits the depth of any parsed regexp to 1000. That is, the depth of the parse tree is required to be ≤ 1000. Regexps that require deeper parse trees will return ErrInternalError. A future CL will change the error to ErrInvalidDepth, but using ErrInternalError for now avoids introducing new API in point releases when this is backported. Fixes #51112. Fixes #51117. Change-Id: I97d2cd82195946eb43a4ea8561f5b95f91fb14c5 Reviewed-on: https://go-review.googlesource.com/c/go/+/384616 Trust: Russ Cox <rsc@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> Reviewed-by: Ian Lance Taylor <iant@golang.org> Reviewed-on: https://go-review.googlesource.com/c/go/+/384855
1 parent 0a6cf87 commit 2b65cde

File tree

2 files changed

+77
-2
lines changed

2 files changed

+77
-2
lines changed

src/regexp/syntax/parse.go

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,29 @@ const (
7676
opVerticalBar
7777
)
7878

79+
// maxHeight is the maximum height of a regexp parse tree.
80+
// It is somewhat arbitrarily chosen, but the idea is to be large enough
81+
// that no one will actually hit in real use but at the same time small enough
82+
// that recursion on the Regexp tree will not hit the 1GB Go stack limit.
83+
// The maximum amount of stack for a single recursive frame is probably
84+
// closer to 1kB, so this could potentially be raised, but it seems unlikely
85+
// that people have regexps nested even this deeply.
86+
// We ran a test on Google's C++ code base and turned up only
87+
// a single use case with depth > 100; it had depth 128.
88+
// Using depth 1000 should be plenty of margin.
89+
// As an optimization, we don't even bother calculating heights
90+
// until we've allocated at least maxHeight Regexp structures.
91+
const maxHeight = 1000
92+
7993
type parser struct {
8094
flags Flags // parse mode flags
8195
stack []*Regexp // stack of parsed expressions
8296
free *Regexp
8397
numCap int // number of capturing groups seen
8498
wholeRegexp string
85-
tmpClass []rune // temporary char class work space
99+
tmpClass []rune // temporary char class work space
100+
numRegexp int // number of regexps allocated
101+
height map[*Regexp]int // regexp height for height limit check
86102
}
87103

88104
func (p *parser) newRegexp(op Op) *Regexp {
@@ -92,16 +108,52 @@ func (p *parser) newRegexp(op Op) *Regexp {
92108
*re = Regexp{}
93109
} else {
94110
re = new(Regexp)
111+
p.numRegexp++
95112
}
96113
re.Op = op
97114
return re
98115
}
99116

100117
func (p *parser) reuse(re *Regexp) {
118+
if p.height != nil {
119+
delete(p.height, re)
120+
}
101121
re.Sub0[0] = p.free
102122
p.free = re
103123
}
104124

125+
func (p *parser) checkHeight(re *Regexp) {
126+
if p.numRegexp < maxHeight {
127+
return
128+
}
129+
if p.height == nil {
130+
p.height = make(map[*Regexp]int)
131+
for _, re := range p.stack {
132+
p.checkHeight(re)
133+
}
134+
}
135+
if p.calcHeight(re, true) > maxHeight {
136+
panic(ErrInternalError)
137+
}
138+
}
139+
140+
func (p *parser) calcHeight(re *Regexp, force bool) int {
141+
if !force {
142+
if h, ok := p.height[re]; ok {
143+
return h
144+
}
145+
}
146+
h := 1
147+
for _, sub := range re.Sub {
148+
hsub := p.calcHeight(sub, false)
149+
if h < 1+hsub {
150+
h = 1 + hsub
151+
}
152+
}
153+
p.height[re] = h
154+
return h
155+
}
156+
105157
// Parse stack manipulation.
106158

107159
// push pushes the regexp re onto the parse stack and returns the regexp.
@@ -137,6 +189,7 @@ func (p *parser) push(re *Regexp) *Regexp {
137189
}
138190

139191
p.stack = append(p.stack, re)
192+
p.checkHeight(re)
140193
return re
141194
}
142195

@@ -246,6 +299,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
246299
re.Sub = re.Sub0[:1]
247300
re.Sub[0] = sub
248301
p.stack[n-1] = re
302+
p.checkHeight(re)
249303

250304
if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
251305
return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
@@ -693,6 +747,21 @@ func literalRegexp(s string, flags Flags) *Regexp {
693747
// Flags, and returns a regular expression parse tree. The syntax is
694748
// described in the top-level comment.
695749
func Parse(s string, flags Flags) (*Regexp, error) {
750+
return parse(s, flags)
751+
}
752+
753+
func parse(s string, flags Flags) (_ *Regexp, err error) {
754+
defer func() {
755+
switch r := recover(); r {
756+
default:
757+
panic(r)
758+
case nil:
759+
// ok
760+
case ErrInternalError:
761+
err = &Error{Code: ErrInternalError, Expr: s}
762+
}
763+
}()
764+
696765
if flags&Literal != 0 {
697766
// Trivial parser for literal string.
698767
if err := checkUTF8(s); err != nil {
@@ -704,7 +773,6 @@ func Parse(s string, flags Flags) (*Regexp, error) {
704773
// Otherwise, must do real work.
705774
var (
706775
p parser
707-
err error
708776
c rune
709777
op Op
710778
lastRepeat string

src/regexp/syntax/parse_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,11 @@ var parseTests = []parseTest{
207207
// Valid repetitions.
208208
{`((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))`, ``},
209209
{`((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})`, ``},
210+
211+
// Valid nesting.
212+
{strings.Repeat("(", 999) + strings.Repeat(")", 999), ``},
213+
{strings.Repeat("(?:", 999) + strings.Repeat(")*", 999), ``},
214+
{"(" + strings.Repeat("|", 12345) + ")", ``}, // not nested at all
210215
}
211216

212217
const testFlags = MatchNL | PerlX | UnicodeGroups
@@ -482,6 +487,8 @@ var invalidRegexps = []string{
482487
`a{100000}`,
483488
`a{100000,}`,
484489
"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
490+
strings.Repeat("(", 1000) + strings.Repeat(")", 1000),
491+
strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000),
485492
`\Q\E*`,
486493
}
487494

0 commit comments

Comments
 (0)