Skip to content
This repository has been archived by the owner on Mar 8, 2020. It is now read-only.

Fix js-specific escape sequence handling #79

Merged
merged 5 commits into from
Mar 29, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion driver/normalizer/normalizer.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package normalizer

import (
"strconv"
"strings"

"gopkg.in/bblfsh/sdk.v2/uast"
Expand Down Expand Up @@ -145,7 +146,7 @@ var Normalizers = []Mapping{
)),
MapSemantic("StringLiteral", uast.String{}, MapObj(
Fields{
{Name: "value", Op: Quote(Var("val"))},
{Name: "value", Op: doubleQuote(Var("val"))},
},
Obj{
"Value": Var("val"),
Expand Down Expand Up @@ -393,3 +394,12 @@ func (op singleQuote) Construct(st *State, n nodes.Node) (nodes.Node, error) {
s := quoteSingle(string(sn))
return nodes.String(s), nil
}

// doubleQuote is a transformer.Quote + JS-specific escape sequence handing
func doubleQuote(op Op) Op {
return StringConv(op, func(s string) (string, error) {
return unquoteDouble(s)
}, func(s string) (string, error) {
return strconv.Quote(s), nil
})
}
16 changes: 15 additions & 1 deletion driver/normalizer/strconv.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ func contains(s string, c byte) bool {
// current implementation and found this was fastest.
func replaceEscapedMaybe(s, old, repl string) string {
var out strings.Builder
oldFirstRune, _ := utf8.DecodeRuneInString(old)
for s != "" {
pos := strings.Index(s, old)
if pos < 0 {
Expand All @@ -75,12 +76,15 @@ func replaceEscapedMaybe(s, old, repl string) string {
out.WriteString(s[:pos])
s = s[pos+len(old):]
r, n := utf8.DecodeRuneInString(s)
s = s[n:]
if r >= '0' && r <= '9' {
out.WriteString(old)
} else {
out.WriteString(repl)
}
if oldFirstRune == r {
continue
}
s = s[n:]
if n != 0 {
out.WriteRune(r)
}
Expand Down Expand Up @@ -167,3 +171,13 @@ func appendEscapedRune(buf []byte, r rune, quote byte) []byte {
}
return buf
}

// unquoteDouble is strconv.Unquote + JS-specific escape sequence handling.
func unquoteDouble(s string) (string, error) {
// Handle JS-specific escape sequences before removing quotes.
ns, err := strconv.Unquote(replaceEscapedMaybe(s, "\\0", "\x00"))
if err != nil {
return "", fmt.Errorf("%v (%s)", err, s)
}
return ns, nil
}
19 changes: 13 additions & 6 deletions driver/normalizer/strconv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"github.com/stretchr/testify/require"
)

var testCasesUnquote = []struct {
var testCasesUnquoteSingle = []struct {
quoted string
unquoted string
// In some cases unquoting and then re-quoting a quoted string does not produce a
Expand All @@ -17,16 +17,23 @@ var testCasesUnquote = []struct {
// to be decoded via Go's native rules to the byte sequence we want.
canonicalQuoted string
}{
{`'a'`, "a", `'a'`},
{`'\x00'`, "\u0000", `'\x00'`},
{`'a'`, "a", "'a'"},
{`'\x00'`, "\u0000", "'\\x00'"},
{`'\0'`, "\u0000", "'\\x00'"},
{`'\0something\0'`, "\u0000something\u0000", "'\\x00something\\x00'"},
{`'\0something\0else'`, "\u0000something\u0000else", "'\\x00something\\x00else'"},
{`'\u0000123\0s'`, "\u0000123\u0000s", "'\\x00123\\x00s'"},
{`'\0\0'`, "\u0000\u0000", "'\\x00\\x00'"},
}

func TestUnquoteDouble(t *testing.T) {
s, err := unquoteDouble(`"\0\0\0\0\0\0\0\0"`)
bzz marked this conversation as resolved.
Show resolved Hide resolved
require.NoError(t, err)
require.Equal(t, "\x00\x00\x00\x00\x00\x00\x00\x00", s)
}

func TestUnquoteSingle(t *testing.T) {
for _, test := range testCasesUnquote {
for _, test := range testCasesUnquoteSingle {
t.Run("", func(t *testing.T) {
s, err := unquoteSingle(test.quoted)
require.NoError(t, err)
Expand All @@ -36,7 +43,7 @@ func TestUnquoteSingle(t *testing.T) {
}

func TestUnquoteSingleAndQuoteBack(t *testing.T) {
for _, test := range testCasesUnquote {
for _, test := range testCasesUnquoteSingle {
t.Run("", func(t *testing.T) {
u, err := unquoteSingle(test.quoted)
require.NoError(t, err)
Expand Down Expand Up @@ -66,7 +73,7 @@ func printDebug(t *testing.T, quoted, actual string) {
}

func BenchmarkReplacingNullEscape(b *testing.B) {
for _, test := range testCasesUnquote {
for _, test := range testCasesUnquoteSingle {
b.Run("", func(b *testing.B) {
b.ReportAllocs()
for n := 0; n < b.N; n++ {
Expand Down
2 changes: 2 additions & 0 deletions fixtures/string-literal.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ var escOpen = '\0OPEN'+Math.random()+'\0';
var escClose = '\0CLOSE'+Math.random()+'\0';
var escComma = '\0COMMA'+Math.random()+'\0';
var escPeriod = '\0PERIOD'+Math.random()+'\0';

d = "\0\0\0\0\0\0\0\0"
80 changes: 74 additions & 6 deletions fixtures/string-literal.js.native
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
comments: [],
end: 255,
end: 278,
loc: {
end: {
column: 0,
line: 10,
column: 22,
line: 11,
},
start: {
column: 0,
Expand Down Expand Up @@ -1157,13 +1157,81 @@
start: 208,
type: "VariableDeclaration",
},
{
end: 278,
expression: {
end: 278,
left: {
end: 257,
loc: {
end: {
column: 1,
line: 11,
},
identifierName: "d",
start: {
column: 0,
line: 11,
},
},
name: "d",
start: 256,
type: "Identifier",
},
loc: {
end: {
column: 22,
line: 11,
},
start: {
column: 0,
line: 11,
},
},
operator: "=",
right: {
end: 278,
extra: {
raw: "\"\\0\\0\\0\\0\\0\\0\\0\\0\"",
rawValue: "\x00\x00\x00\x00\x00\x00\x00\x00",
},
loc: {
end: {
column: 22,
line: 11,
},
start: {
column: 4,
line: 11,
},
},
start: 260,
type: "StringLiteral",
value: "\x00\x00\x00\x00\x00\x00\x00\x00",
},
start: 256,
type: "AssignmentExpression",
},
loc: {
end: {
column: 22,
line: 11,
},
start: {
column: 0,
line: 11,
},
},
start: 256,
type: "ExpressionStatement",
},
],
directives: [],
end: 255,
end: 278,
loc: {
end: {
column: 0,
line: 10,
column: 22,
line: 11,
},
start: {
column: 0,
Expand Down
78 changes: 72 additions & 6 deletions fixtures/string-literal.js.uast
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
col: 1,
},
end: { '@type': "uast:Position",
offset: 256,
line: 10,
col: 1,
offset: 279,
line: 11,
col: 23,
},
},
comments: [],
Expand All @@ -22,9 +22,9 @@
col: 1,
},
end: { '@type': "uast:Position",
offset: 256,
line: 10,
col: 1,
offset: 279,
line: 11,
col: 23,
},
},
body: [
Expand Down Expand Up @@ -1141,6 +1141,72 @@
],
kind: "var",
},
{ '@type': "ExpressionStatement",
'@role': [Statement],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
offset: 257,
line: 11,
col: 1,
},
end: { '@type': "uast:Position",
offset: 279,
line: 11,
col: 23,
},
},
expression: { '@type': "AssignmentExpression",
'@role': [Assignment, Binary, Expression, Operator],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
offset: 257,
line: 11,
col: 1,
},
end: { '@type': "uast:Position",
offset: 279,
line: 11,
col: 23,
},
},
left: { '@type': "Identifier",
'@token': "d",
'@role': [Assignment, Binary, Expression, Identifier, Left],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
offset: 257,
line: 11,
col: 1,
},
end: { '@type': "uast:Position",
offset: 258,
line: 11,
col: 2,
},
},
},
operator: { '@type': "uast:Operator",
'@token': "=",
'@role': [Assignment, Binary, Expression, Operator],
},
right: { '@type': "StringLiteral",
'@token': "\"\\0\\0\\0\\0\\0\\0\\0\\0\"",
'@role': [Assignment, Binary, Expression, Literal, Right, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
offset: 261,
line: 11,
col: 5,
},
end: { '@type': "uast:Position",
offset: 279,
line: 11,
col: 23,
},
},
},
},
},
],
directives: [],
sourceType: "module",
Expand Down