Skip to content

Commit

Permalink
remove use of go's "unsafe" package in the parser
Browse files Browse the repository at this point in the history
  • Loading branch information
evanw committed Feb 10, 2022
1 parent 84a237e commit 4db57c1
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 140 deletions.
81 changes: 56 additions & 25 deletions internal/js_lexer/js_lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"unicode"
"unicode/utf8"

"github.com/evanw/esbuild/internal/ast"
"github.com/evanw/esbuild/internal/config"
"github.com/evanw/esbuild/internal/helpers"
"github.com/evanw/esbuild/internal/js_ast"
Expand Down Expand Up @@ -216,10 +217,37 @@ type json struct {
allowComments bool
}

// This represents a string that is maybe a substring of the current file's
// "source.Contents" string. The point of doing this is that if it is a
// substring (the common case), then we can represent it more efficiently.
//
// For compactness and performance, the JS AST represents identifiers as a
// symbol reference instead of as a string. However, we need to track the
// string between the first pass and the second pass because the string is only
// resolved to a symbol in the second pass. To avoid allocating extra memory
// to store the string, we instead use an index+length slice of the original JS
// source code. That index is what "Start" represents here. The length is just
// "len(String)".
//
// Set "Start" to invalid (the zero value) if "String" is not a substring of
// "source.Contents". This is the case for escaped identifiers. For example,
// the identifier "fo\u006f" would be "MaybeSubstring{String: "foo"}". It's
// critical that any code changing the "String" also set "Start" to the zero
// value, which is best done by just overwriting the whole "MaybeSubstring".
//
// The substring range used to be recovered automatically from the string but
// that relied on the Go "unsafe" package which can hypothetically break under
// certain Go compiler optimization passes, so it has been removed and replaced
// with this more error-prone approach that doesn't use "unsafe".
type MaybeSubstring struct {
String string
Start ast.Index32
}

type Lexer struct {
CommentsToPreserveBefore []js_ast.Comment
AllOriginalComments []js_ast.Comment
Identifier string
Identifier MaybeSubstring
log logger.Log
source logger.Source
JSXFactoryPragmaComment logger.Span
Expand Down Expand Up @@ -323,6 +351,10 @@ func (lexer *Lexer) Raw() string {
return lexer.source.Contents[lexer.start:lexer.end]
}

func (lexer *Lexer) rawIdentifier() MaybeSubstring {
return MaybeSubstring{lexer.Raw(), ast.MakeIndex32(uint32(lexer.start))}
}

func (lexer *Lexer) StringLiteral() []uint16 {
if lexer.decodedStringLiteralOrNil == nil {
// Lazily decode escape sequences if needed
Expand Down Expand Up @@ -1143,7 +1175,7 @@ func (lexer *Lexer) NextInsideJSXElement() {
lexer.step()
}

lexer.Identifier = lexer.Raw()
lexer.Identifier = lexer.rawIdentifier()
lexer.Token = TIdentifier
break
}
Expand Down Expand Up @@ -1185,7 +1217,7 @@ func (lexer *Lexer) Next() {
break hashbang
}
}
lexer.Identifier = lexer.Raw()
lexer.Identifier = lexer.rawIdentifier()
} else {
// "#foo"
lexer.step()
Expand All @@ -1202,7 +1234,7 @@ func (lexer *Lexer) Next() {
if lexer.codePoint == '\\' {
lexer.Identifier, _ = lexer.scanIdentifierWithEscapes(privateIdentifier)
} else {
lexer.Identifier = lexer.Raw()
lexer.Identifier = lexer.rawIdentifier()
}
}
lexer.Token = TPrivateIdentifier
Expand Down Expand Up @@ -1717,9 +1749,8 @@ func (lexer *Lexer) Next() {
if lexer.codePoint == '\\' {
lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier)
} else {
contents := lexer.Raw()
lexer.Identifier = contents
lexer.Token = Keywords[contents]
lexer.Identifier = lexer.rawIdentifier()
lexer.Token = Keywords[lexer.Raw()]
if lexer.Token == 0 {
lexer.Token = TIdentifier
}
Expand Down Expand Up @@ -1747,7 +1778,7 @@ func (lexer *Lexer) Next() {
lexer.Identifier, lexer.Token = lexer.scanIdentifierWithEscapes(normalIdentifier)
} else {
lexer.Token = TIdentifier
lexer.Identifier = lexer.Raw()
lexer.Identifier = lexer.rawIdentifier()
}
break
}
Expand All @@ -1769,7 +1800,7 @@ const (

// This is an edge case that doesn't really exist in the wild, so it doesn't
// need to be as fast as possible.
func (lexer *Lexer) scanIdentifierWithEscapes(kind identifierKind) (string, T) {
func (lexer *Lexer) scanIdentifierWithEscapes(kind identifierKind) (MaybeSubstring, T) {
// First pass: scan over the identifier to see how long it is
for {
// Scan a unicode escape sequence. There is at least one because that's
Expand Down Expand Up @@ -1848,9 +1879,9 @@ func (lexer *Lexer) scanIdentifierWithEscapes(kind identifierKind) (string, T) {
// foo.\u0076\u0061\u0072;
//
if Keywords[text] != 0 {
return text, TEscapedKeyword
return MaybeSubstring{String: text}, TEscapedKeyword
} else {
return text, TIdentifier
return MaybeSubstring{String: text}, TIdentifier
}
}

Expand Down Expand Up @@ -1978,7 +2009,7 @@ func (lexer *Lexer) parseNumericLiteralOrDot() {

// Slow path: do we need to re-scan the input as text?
if isBigIntegerLiteral || isInvalidLegacyOctalLiteral {
text := lexer.Raw()
text := lexer.rawIdentifier()

// Can't use a leading zero for bigint literals
if isBigIntegerLiteral && lexer.IsLegacyOctalLiteral {
Expand All @@ -1987,22 +2018,22 @@ func (lexer *Lexer) parseNumericLiteralOrDot() {

// Filter out underscores
if underscoreCount > 0 {
bytes := make([]byte, 0, len(text)-underscoreCount)
for i := 0; i < len(text); i++ {
c := text[i]
bytes := make([]byte, 0, len(text.String)-underscoreCount)
for i := 0; i < len(text.String); i++ {
c := text.String[i]
if c != '_' {
bytes = append(bytes, c)
}
}
text = string(bytes)
text = MaybeSubstring{String: string(bytes)}
}

// Store bigints as text to avoid precision loss
if isBigIntegerLiteral {
lexer.Identifier = text
} else if isInvalidLegacyOctalLiteral {
// Legacy octal literals may turn out to be a base 10 literal after all
value, _ := strconv.ParseFloat(text, 64)
value, _ := strconv.ParseFloat(text.String, 64)
lexer.Number = value
}
}
Expand Down Expand Up @@ -2099,23 +2130,23 @@ func (lexer *Lexer) parseNumericLiteralOrDot() {
}

// Take a slice of the text to parse
text := lexer.Raw()
text := lexer.rawIdentifier()

// Filter out underscores
if underscoreCount > 0 {
bytes := make([]byte, 0, len(text)-underscoreCount)
for i := 0; i < len(text); i++ {
c := text[i]
bytes := make([]byte, 0, len(text.String)-underscoreCount)
for i := 0; i < len(text.String); i++ {
c := text.String[i]
if c != '_' {
bytes = append(bytes, c)
}
}
text = string(bytes)
text = MaybeSubstring{String: string(bytes)}
}

if lexer.codePoint == 'n' && !hasDotOrExponent {
// The only bigint literal that can start with 0 is "0n"
if len(text) > 1 && first == '0' {
if len(text.String) > 1 && first == '0' {
lexer.SyntaxError()
}

Expand All @@ -2124,13 +2155,13 @@ func (lexer *Lexer) parseNumericLiteralOrDot() {
} else if !hasDotOrExponent && lexer.end-lexer.start < 10 {
// Parse a 32-bit integer (very fast path)
var number uint32 = 0
for _, c := range text {
for _, c := range text.String {
number = number*10 + uint32(c-'0')
}
lexer.Number = float64(number)
} else {
// Parse a double-precision floating-point number
value, _ := strconv.ParseFloat(text, 64)
value, _ := strconv.ParseFloat(text.String, 64)
lexer.Number = value
}
}
Expand Down
6 changes: 3 additions & 3 deletions internal/js_lexer/js_lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func expectHashbang(t *testing.T, contents string, expected string) {
msgs := log.Done()
test.AssertEqual(t, len(msgs), 0)
test.AssertEqual(t, lexer.Token, THashbang)
test.AssertEqual(t, lexer.Identifier, expected)
test.AssertEqual(t, lexer.Identifier.String, expected)
})
}

Expand Down Expand Up @@ -113,7 +113,7 @@ func expectIdentifier(t *testing.T, contents string, expected string) {
msgs := log.Done()
test.AssertEqual(t, len(msgs), 0)
test.AssertEqual(t, lexer.Token, TIdentifier)
test.AssertEqual(t, lexer.Identifier, expected)
test.AssertEqual(t, lexer.Identifier.String, expected)
})
}

Expand Down Expand Up @@ -359,7 +359,7 @@ func expectBigInteger(t *testing.T, contents string, expected string) {
msgs := log.Done()
test.AssertEqual(t, len(msgs), 0)
test.AssertEqual(t, lexer.Token, TBigIntegerLiteral)
test.AssertEqual(t, lexer.Identifier, expected)
test.AssertEqual(t, lexer.Identifier.String, expected)
})
}

Expand Down
4 changes: 2 additions & 2 deletions internal/js_parser/global_name_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func ParseGlobalName(log logger.Log, source logger.Source) (result []string, ok
lexer := js_lexer.NewLexerGlobalName(log, source)

// Start off with an identifier
result = append(result, lexer.Identifier)
result = append(result, lexer.Identifier.String)
lexer.Expect(js_lexer.TIdentifier)

// Follow with dot or index expressions
Expand All @@ -31,7 +31,7 @@ func ParseGlobalName(log logger.Log, source logger.Source) (result []string, ok
if !lexer.IsIdentifierOrKeyword() {
lexer.Expect(js_lexer.TIdentifier)
}
result = append(result, lexer.Identifier)
result = append(result, lexer.Identifier.String)
lexer.Next()

case js_lexer.TOpenBracket:
Expand Down
Loading

0 comments on commit 4db57c1

Please sign in to comment.