Skip to content
This repository has been archived by the owner on Mar 8, 2020. It is now read-only.

Commit

Permalink
Merge pull request #79 from bzz/fix/escape-in-doublequote-75
Browse files Browse the repository at this point in the history
Fix js-specific escape sequence handling
  • Loading branch information
bzz committed Mar 29, 2019
2 parents e8ad291 + 06a6ffd commit 65c306a
Show file tree
Hide file tree
Showing 7 changed files with 254 additions and 20 deletions.
12 changes: 11 additions & 1 deletion driver/normalizer/normalizer.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package normalizer

import (
"strconv"
"strings"

"gopkg.in/bblfsh/sdk.v2/uast"
Expand Down Expand Up @@ -145,7 +146,7 @@ var Normalizers = []Mapping{
)),
MapSemantic("StringLiteral", uast.String{}, MapObj(
Fields{
{Name: "value", Op: Quote(Var("val"))},
{Name: "value", Op: doubleQuote(Var("val"))},
},
Obj{
"Value": Var("val"),
Expand Down Expand Up @@ -393,3 +394,12 @@ func (op singleQuote) Construct(st *State, n nodes.Node) (nodes.Node, error) {
s := quoteSingle(string(sn))
return nodes.String(s), nil
}

// doubleQuote is a transformer.Quote + JS-specific escape sequence handing
func doubleQuote(op Op) Op {
return StringConv(op, func(s string) (string, error) {
return unquoteDouble(s)
}, func(s string) (string, error) {
return strconv.Quote(s), nil
})
}
16 changes: 15 additions & 1 deletion driver/normalizer/strconv.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ func contains(s string, c byte) bool {
// current implementation and found this was fastest.
func replaceEscapedMaybe(s, old, repl string) string {
var out strings.Builder
oldFirstRune, _ := utf8.DecodeRuneInString(old)
for s != "" {
pos := strings.Index(s, old)
if pos < 0 {
Expand All @@ -75,12 +76,15 @@ func replaceEscapedMaybe(s, old, repl string) string {
out.WriteString(s[:pos])
s = s[pos+len(old):]
r, n := utf8.DecodeRuneInString(s)
s = s[n:]
if r >= '0' && r <= '9' {
out.WriteString(old)
} else {
out.WriteString(repl)
}
if oldFirstRune == r {
continue
}
s = s[n:]
if n != 0 {
out.WriteRune(r)
}
Expand Down Expand Up @@ -167,3 +171,13 @@ func appendEscapedRune(buf []byte, r rune, quote byte) []byte {
}
return buf
}

// unquoteDouble is strconv.Unquote + JS-specific escape sequence handling.
func unquoteDouble(s string) (string, error) {
// Handle JS-specific escape sequences before removing quotes.
ns, err := strconv.Unquote(replaceEscapedMaybe(s, "\\0", "\x00"))
if err != nil {
return "", fmt.Errorf("%v (%s)", err, s)
}
return ns, nil
}
19 changes: 13 additions & 6 deletions driver/normalizer/strconv_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"github.com/stretchr/testify/require"
)

var testCasesUnquote = []struct {
var testCasesUnquoteSingle = []struct {
quoted string
unquoted string
// In some cases unquoting and then re-quoting a quoted string does not produce a
Expand All @@ -17,16 +17,23 @@ var testCasesUnquote = []struct {
// to be decoded via Go's native rules to the byte sequence we want.
canonicalQuoted string
}{
{`'a'`, "a", `'a'`},
{`'\x00'`, "\u0000", `'\x00'`},
{`'a'`, "a", "'a'"},
{`'\x00'`, "\u0000", "'\\x00'"},
{`'\0'`, "\u0000", "'\\x00'"},
{`'\0something\0'`, "\u0000something\u0000", "'\\x00something\\x00'"},
{`'\0something\0else'`, "\u0000something\u0000else", "'\\x00something\\x00else'"},
{`'\u0000123\0s'`, "\u0000123\u0000s", "'\\x00123\\x00s'"},
{`'\0\0'`, "\u0000\u0000", "'\\x00\\x00'"},
}

func TestUnquoteDouble(t *testing.T) {
s, err := unquoteDouble(`"\0\0\0\0\0\0\0\0"`)
require.NoError(t, err)
require.Equal(t, "\x00\x00\x00\x00\x00\x00\x00\x00", s)
}

func TestUnquoteSingle(t *testing.T) {
for _, test := range testCasesUnquote {
for _, test := range testCasesUnquoteSingle {
t.Run("", func(t *testing.T) {
s, err := unquoteSingle(test.quoted)
require.NoError(t, err)
Expand All @@ -36,7 +43,7 @@ func TestUnquoteSingle(t *testing.T) {
}

func TestUnquoteSingleAndQuoteBack(t *testing.T) {
for _, test := range testCasesUnquote {
for _, test := range testCasesUnquoteSingle {
t.Run("", func(t *testing.T) {
u, err := unquoteSingle(test.quoted)
require.NoError(t, err)
Expand Down Expand Up @@ -66,7 +73,7 @@ func printDebug(t *testing.T, quoted, actual string) {
}

func BenchmarkReplacingNullEscape(b *testing.B) {
for _, test := range testCasesUnquote {
for _, test := range testCasesUnquoteSingle {
b.Run("", func(b *testing.B) {
b.ReportAllocs()
for n := 0; n < b.N; n++ {
Expand Down
2 changes: 2 additions & 0 deletions fixtures/string-literal.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ var escOpen = '\0OPEN'+Math.random()+'\0';
var escClose = '\0CLOSE'+Math.random()+'\0';
var escComma = '\0COMMA'+Math.random()+'\0';
var escPeriod = '\0PERIOD'+Math.random()+'\0';

d = "\0\0\0\0\0\0\0\0"
76 changes: 72 additions & 4 deletions fixtures/string-literal.js.native
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
comments: [],
end: 255,
end: 279,
loc: {
end: {
column: 0,
line: 10,
line: 12,
},
start: {
column: 0,
Expand Down Expand Up @@ -1157,13 +1157,81 @@
start: 208,
type: "VariableDeclaration",
},
{
end: 278,
expression: {
end: 278,
left: {
end: 257,
loc: {
end: {
column: 1,
line: 11,
},
identifierName: "d",
start: {
column: 0,
line: 11,
},
},
name: "d",
start: 256,
type: "Identifier",
},
loc: {
end: {
column: 22,
line: 11,
},
start: {
column: 0,
line: 11,
},
},
operator: "=",
right: {
end: 278,
extra: {
raw: "\"\\0\\0\\0\\0\\0\\0\\0\\0\"",
rawValue: "\x00\x00\x00\x00\x00\x00\x00\x00",
},
loc: {
end: {
column: 22,
line: 11,
},
start: {
column: 4,
line: 11,
},
},
start: 260,
type: "StringLiteral",
value: "\x00\x00\x00\x00\x00\x00\x00\x00",
},
start: 256,
type: "AssignmentExpression",
},
loc: {
end: {
column: 22,
line: 11,
},
start: {
column: 0,
line: 11,
},
},
start: 256,
type: "ExpressionStatement",
},
],
directives: [],
end: 255,
end: 279,
loc: {
end: {
column: 0,
line: 10,
line: 12,
},
start: {
column: 0,
Expand Down
75 changes: 71 additions & 4 deletions fixtures/string-literal.js.sem.uast
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
col: 1,
},
end: { '@type': "uast:Position",
offset: 256,
line: 10,
offset: 280,
line: 12,
col: 1,
},
},
Expand All @@ -22,8 +22,8 @@
col: 1,
},
end: { '@type': "uast:Position",
offset: 256,
line: 10,
offset: 280,
line: 12,
col: 1,
},
},
Expand Down Expand Up @@ -1139,6 +1139,73 @@
],
kind: "var",
},
{ '@type': "javascript:ExpressionStatement",
'@role': [Statement],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
offset: 257,
line: 11,
col: 1,
},
end: { '@type': "uast:Position",
offset: 279,
line: 11,
col: 23,
},
},
expression: { '@type': "javascript:AssignmentExpression",
'@role': [Assignment, Binary, Expression, Operator],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
offset: 257,
line: 11,
col: 1,
},
end: { '@type': "uast:Position",
offset: 279,
line: 11,
col: 23,
},
},
left: { '@type': "uast:Identifier",
'@role': [Assignment, Binary, Left],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
offset: 257,
line: 11,
col: 1,
},
end: { '@type': "uast:Position",
offset: 258,
line: 11,
col: 2,
},
},
Name: "d",
},
operator: { '@type': "uast:Operator",
'@token': "=",
'@role': [Assignment, Binary, Expression, Operator],
},
right: { '@type': "uast:String",
'@role': [Assignment, Binary, Right],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
offset: 261,
line: 11,
col: 5,
},
end: { '@type': "uast:Position",
offset: 279,
line: 11,
col: 23,
},
},
Format: "",
Value: "\x00\x00\x00\x00\x00\x00\x00\x00",
},
},
},
],
directives: [],
sourceType: "module",
Expand Down
Loading

0 comments on commit 65c306a

Please sign in to comment.