Skip to content

Commit 71d038e

Browse files
authored
fix: skip parsing of JavaScript regexp literals: /test/ (#1245)
1 parent 18fb42a commit 71d038e

File tree

3 files changed

+278
-61
lines changed

3 files changed

+278
-61
lines changed

parser/v2/scriptparser.go

Lines changed: 136 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -32,30 +32,29 @@ func (p scriptElementParser) Parse(pi *parse.Input) (n Node, ok bool, err error)
3232
var name string
3333
if name, ok, err = elementNameParser.Parse(pi); err != nil || !ok {
3434
pi.Seek(start)
35-
return
35+
return n, false, err
3636
}
3737

3838
if name != "script" {
3939
pi.Seek(start)
40-
ok = false
41-
return
40+
return n, false, nil
4241
}
4342

4443
if e.Attributes, ok, err = (attributesParser{}).Parse(pi); err != nil || !ok {
4544
pi.Seek(start)
46-
return
45+
return n, false, err
4746
}
4847

4948
// Optional whitespace.
5049
if _, _, err = parse.OptionalWhitespace.Parse(pi); err != nil {
5150
pi.Seek(start)
52-
return
51+
return n, false, err
5352
}
5453

5554
// >
5655
if _, ok, err = gt.Parse(pi); err != nil || !ok {
5756
pi.Seek(start)
58-
return
57+
return n, false, parse.Error("<script>: unclosed element - missing '>'", pi.Position())
5958
}
6059

6160
// If there's a type attribute and it's not a JS attribute (e.g. text/javascript), we need to parse the contents as raw text.
@@ -89,18 +88,25 @@ loop:
8988
// - \ - Start of an escape sequence, we can just take the value.
9089
// - Anything else - Add it to the script.
9190

92-
if _, ok, err = jsEndTag.Parse(pi); err != nil || ok {
91+
_, ok, err = jsEndTag.Parse(pi)
92+
if err != nil {
93+
return nil, false, err
94+
}
95+
if ok {
9396
// We've reached the end of the script.
9497
break loop
9598
}
9699

97-
if _, ok, err = endTagStart.Parse(pi); err != nil || ok {
98-
// We've reached the end of the script, but the end tag is probably invalid.
99-
break loop
100+
_, ok, err = endTagStart.Parse(pi)
101+
if err != nil {
102+
return nil, false, err
103+
}
104+
if ok {
105+
return nil, false, parse.Error("<script>: invalid end tag, expected </script> not found", pi.Position())
100106
}
101107

102-
var code Node
103-
code, ok, err = goCodeInJavaScript.Parse(pi)
108+
// Try for a Go code expression, i.e. {{ goCode }}.
109+
code, ok, err := goCodeInJavaScript.Parse(pi)
104110
if err != nil {
105111
return nil, false, err
106112
}
@@ -110,8 +116,7 @@ loop:
110116
}
111117

112118
// Try for a comment.
113-
var comment string
114-
comment, ok, err = jsComment.Parse(pi)
119+
comment, ok, err := jsComment.Parse(pi)
115120
if err != nil {
116121
return nil, false, err
117122
}
@@ -120,46 +125,62 @@ loop:
120125
continue loop
121126
}
122127

123-
// Read JavaScript chracaters.
128+
// Read JavaScript characters.
129+
charLoop:
124130
for {
125131
before := pi.Index()
126-
var c string
127-
c, ok, err := jsCharacter.Parse(pi)
132+
133+
// Check for a regular expression literal.
134+
r, ok, err := regexpLiteral.Parse(pi)
128135
if err != nil {
129136
return nil, false, err
130137
}
131138
if ok {
132-
_, isEOF, _ := parse.EOF[string]().Parse(pi)
133-
if c == `"` || c == "'" || c == "`" {
134-
// Start or exit a string literal.
135-
if stringLiteralDelimiter == jsQuoteNone {
136-
stringLiteralDelimiter = jsQuote(c)
137-
} else if stringLiteralDelimiter == jsQuote(c) {
138-
stringLiteralDelimiter = jsQuoteNone
139-
}
140-
}
141-
peeked, _ := pi.Peek(1)
142-
peeked = c + peeked
143-
144-
breakForGo := peeked == "{{"
145-
breakForHTML := stringLiteralDelimiter == jsQuoteNone && (peeked == "</" || peeked == "//" || peeked == "/*")
146-
147-
if isEOF || breakForGo || breakForHTML {
148-
if sb.Len() > 0 {
149-
e.Contents = append(e.Contents, NewScriptContentsScriptCode(sb.String()))
150-
sb.Reset()
151-
}
152-
if isEOF {
153-
break loop
154-
}
155-
pi.Seek(before)
156-
continue loop
157-
}
158-
sb.WriteString(c)
139+
sb.WriteString(r)
140+
continue charLoop
159141
}
142+
143+
// Check for EOF.
160144
if _, ok, _ = parse.EOF[string]().Parse(pi); ok {
161145
return nil, false, parse.Error("script: unclosed <script> element", pi.Position())
162146
}
147+
148+
// Check for a character.
149+
c, ok, err := jsCharacter.Parse(pi)
150+
if err != nil {
151+
return nil, false, err
152+
}
153+
if !ok {
154+
return nil, false, parse.Error("script: expected to parse a character, but didn't", pi.Position())
155+
}
156+
if c == string(jsQuoteDouble) || c == string(jsQuoteSingle) || c == string(jsQuoteBacktick) {
157+
// Start or exit a string literal.
158+
if stringLiteralDelimiter == jsQuoteNone {
159+
stringLiteralDelimiter = jsQuote(c)
160+
} else if stringLiteralDelimiter == jsQuote(c) {
161+
stringLiteralDelimiter = jsQuoteNone
162+
}
163+
}
164+
165+
peeked, peekOK := pi.Peek(1)
166+
isEOF := !peekOK
167+
peeked = c + peeked
168+
breakForGo := peeked == "{{"
169+
breakForHTML := stringLiteralDelimiter == jsQuoteNone && peeked == "</"
170+
breakForComment := stringLiteralDelimiter == jsQuoteNone && (peeked == "//" || peeked == "/*")
171+
if isEOF || breakForGo || breakForHTML || breakForComment {
172+
if sb.Len() > 0 {
173+
e.Contents = append(e.Contents, NewScriptContentsScriptCode(sb.String()))
174+
sb.Reset()
175+
}
176+
if isEOF {
177+
break loop
178+
}
179+
pi.Seek(before)
180+
continue loop
181+
}
182+
183+
sb.WriteString(c)
163184
}
164185
}
165186

@@ -238,3 +259,74 @@ var (
238259
jsEndOfMultiLineComment = parse.StringFrom(parse.Or(parse.String("*/"), parse.EOF[string]()))
239260
jsMultiLineComment = parse.StringFrom(jsStartMultiLineComment, parse.StringUntil(jsEndOfMultiLineComment), jsEndOfMultiLineComment, parse.OptionalWhitespace)
240261
)
262+
263+
var regexpLiteral = parse.Func(func(in *parse.Input) (regexp string, ok bool, err error) {
264+
startIndex := in.Index()
265+
266+
// Take the initial '/'.
267+
s, ok := in.Take(1)
268+
if !ok || s != "/" {
269+
in.Seek(startIndex)
270+
return "", false, nil
271+
}
272+
// Peek the next char. If it's also a '/', then this is not a regex literal, but the start of a comment.
273+
p, ok := in.Peek(1)
274+
if !ok || p == "/" {
275+
in.Seek(startIndex)
276+
return "", false, nil
277+
}
278+
var literal strings.Builder
279+
literal.WriteString(s)
280+
281+
var inClass, escaped bool
282+
283+
for {
284+
s, ok := in.Take(1)
285+
if !ok {
286+
// Restore position if no closing '/'.
287+
in.Seek(startIndex)
288+
return "", false, nil
289+
}
290+
291+
literal.WriteString(s)
292+
293+
if escaped {
294+
escaped = false
295+
continue
296+
}
297+
298+
switch s {
299+
case "\n", "\r":
300+
// Newline in a regex is not allowed, so we restore the position and return false.
301+
in.Seek(startIndex)
302+
return "", false, nil
303+
case "\\":
304+
escaped = true
305+
case "[":
306+
inClass = true
307+
case "]":
308+
inClass = false
309+
case "/":
310+
if !inClass {
311+
// We've reached the end of the regex, but there may be flags after it.
312+
// Read flags until we hit a non-flag character.
313+
flags, ok, err := regexpFlags.Parse(in)
314+
if err != nil {
315+
return "", false, err
316+
}
317+
if ok {
318+
literal.WriteString(flags)
319+
}
320+
output := literal.String()
321+
if strings.Contains(output, "{{") && strings.Contains(output, "}}") {
322+
// If the regex contains a Go expression, don't treat it as a regex literal.
323+
in.Seek(startIndex)
324+
return "", false, nil
325+
}
326+
return output, true, nil
327+
}
328+
}
329+
}
330+
})
331+
332+
var regexpFlags = parse.StringFrom(parse.Repeat(0, 5, parse.RuneIn("gimuy")))

0 commit comments

Comments
 (0)