Skip to content
This repository has been archived by the owner on Mar 8, 2020. It is now read-only.

Preserve raw tokens for strings and regexps #50

Merged
merged 2 commits into from
Jan 22, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion driver/normalizer/annotation.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ var Annotations = []Mapping{
// Literals
AnnotateType("RegExpLiteral",
FieldRoles{
"pattern": {Rename: uast.KeyToken},
"raw": {Rename: uast.KeyToken},
},
role.Expression, role.Literal, role.Regexp,
),
Expand Down
80 changes: 79 additions & 1 deletion driver/normalizer/normalizer.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package normalizer

import (
"strings"

"gopkg.in/bblfsh/sdk.v2/uast"
"gopkg.in/bblfsh/sdk.v2/uast/nodes"
. "gopkg.in/bblfsh/sdk.v2/uast/transformer"
)

Expand Down Expand Up @@ -29,6 +32,34 @@ var Preprocessors = []Mapping{
Part("_", Obj{"loc": AnyNode(nil)}),
Part("_", Obj{}),
),
// preserve raw string and regexp literals
Map(
Part("_", Obj{
uast.KeyType: String("StringLiteral"),
"value": AnyNode(nil),
"extra": Obj{
"raw": Var("raw"),
"rawValue": AnyNode(nil),
},
}),
Part("_", Obj{
uast.KeyType: String("StringLiteral"),
"value": Var("raw"),
}),
),
Map(
Part("_", Obj{
uast.KeyType: String("RegExpLiteral"),
"extra": Obj{
"raw": Var("raw"),
},
}),
Part("_", Obj{
uast.KeyType: String("RegExpLiteral"),
"raw": Var("raw"),
}),
),
// drop extra info for other nodes (it duplicates other node fields)
Map(
Part("_", Obj{"extra": AnyNode(nil)}),
Part("_", Obj{}),
Expand All @@ -47,7 +78,16 @@ var Normalizers = []Mapping{
)),
MapSemantic("StringLiteral", uast.String{}, MapObj(
Obj{
"value": Var("val"),
"value": singleQuote{Var("val")},
},
Obj{
"Value": Var("val"),
"Format": String("single"),
},
)),
MapSemantic("StringLiteral", uast.String{}, MapObj(
Obj{
"value": Quote(Var("val")),
},
Obj{
"Value": Var("val"),
Expand Down Expand Up @@ -243,3 +283,41 @@ var Normalizers = []Mapping{
},
)),
}

type singleQuote struct {
op Op
}

func (op singleQuote) Kinds() nodes.Kind {
return nodes.KindString
}

func (op singleQuote) Check(st *State, n nodes.Node) (bool, error) {
sn, ok := n.(nodes.String)
if !ok {
return false, nil
}
s := string(sn)
if !strings.HasPrefix(s, `'`) || !strings.HasSuffix(s, `'`) {
return false, nil
}
s = s[1 : len(s)-1]
s, err := unquoteSingle(s)
if err != nil {
return false, err
}
return op.op.Check(st, nodes.String(s))
}

func (op singleQuote) Construct(st *State, n nodes.Node) (nodes.Node, error) {
n, err := op.op.Construct(st, n)
if err != nil {
return nil, err
}
sn, ok := n.(nodes.String)
if !ok {
return nil, ErrUnexpectedType.New(nodes.String(""), n)
}
s := quoteSingle(string(sn))
return nodes.String(s), nil
}
109 changes: 109 additions & 0 deletions driver/normalizer/strconv.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package normalizer

import (
"strconv"
"unicode/utf8"
)

// Functions below are copied from strconv.Unquote and strconv.Quote.
// Original functions are unable to escape/unescape values containing
// multiple characters since in Go single quotes represent a rune literal

// unquoteSingle is the same as strconv.Unquote, but uses ' as a quote.
func unquoteSingle(s string) (string, error) {
var runeTmp [utf8.UTFMax]byte
buf := make([]byte, 0, 3*len(s)/2)
for len(s) > 0 {
c, multibyte, ss, err := strconv.UnquoteChar(s, '\'')
if err != nil {
return "", err
}
s = ss
if c < utf8.RuneSelf || !multibyte {
buf = append(buf, byte(c))
} else {
n := utf8.EncodeRune(runeTmp[:], c)
buf = append(buf, runeTmp[:n]...)
}
}
return string(buf), nil
}

const lowerhex = "0123456789abcdef"

// quoteSingle is the same as strconv.Quote, but uses ' as a quote.
func quoteSingle(s string) string {
const (
quote = '\''
)

buf := make([]byte, 0, 3*len(s)/2)
buf = append(buf, quote)
for width := 0; len(s) > 0; s = s[width:] {
r := rune(s[0])
width = 1
if r >= utf8.RuneSelf {
r, width = utf8.DecodeRuneInString(s)
}
if width == 1 && r == utf8.RuneError {
buf = append(buf, `\x`...)
buf = append(buf, lowerhex[s[0]>>4])
buf = append(buf, lowerhex[s[0]&0xF])
continue
}
buf = appendEscapedRune(buf, r, quote)
}
buf = append(buf, quote)
return string(buf)
}

func appendEscapedRune(buf []byte, r rune, quote byte) []byte {
var runeTmp [utf8.UTFMax]byte
if r == rune(quote) || r == '\\' { // always backslashed
buf = append(buf, '\\')
buf = append(buf, byte(r))
return buf
}
if strconv.IsPrint(r) {
n := utf8.EncodeRune(runeTmp[:], r)
buf = append(buf, runeTmp[:n]...)
return buf
}
switch r {
case '\a':
buf = append(buf, `\a`...)
case '\b':
buf = append(buf, `\b`...)
case '\f':
buf = append(buf, `\f`...)
case '\n':
buf = append(buf, `\n`...)
case '\r':
buf = append(buf, `\r`...)
case '\t':
buf = append(buf, `\t`...)
case '\v':
buf = append(buf, `\v`...)
default:
switch {
case r < ' ':
buf = append(buf, `\x`...)
buf = append(buf, lowerhex[byte(r)>>4])
buf = append(buf, lowerhex[byte(r)&0xF])
case r > utf8.MaxRune:
r = 0xFFFD
fallthrough
case r < 0x10000:
buf = append(buf, `\u`...)
for s := 12; s >= 0; s -= 4 {
buf = append(buf, lowerhex[r>>uint(s)&0xF])
}
default:
buf = append(buf, `\U`...)
for s := 28; s >= 0; s -= 4 {
buf = append(buf, lowerhex[r>>uint(s)&0xF])
}
}
}
return buf
}
2 changes: 1 addition & 1 deletion fixtures/call-expression.js.sem.uast
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@
col: 10,
},
},
Format: "",
Format: "single",
Value: "",
},
],
Expand Down
2 changes: 1 addition & 1 deletion fixtures/call-expression.js.uast
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@
},
arguments: [
{ '@type': "StringLiteral",
'@token': "",
'@token': "''",
'@role': [Argument, Call, Expression, Literal, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down
2 changes: 1 addition & 1 deletion fixtures/export-declaration.js.uast
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@
},
exportKind: "value",
source: { '@type': "StringLiteral",
'@token': "mod",
'@token': "\"mod\"",
'@role': [Expression, Literal, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down
6 changes: 3 additions & 3 deletions fixtures/ext_typedecl.js.uast
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,7 @@
},
},
expression: { '@type': "StringLiteral",
'@token': "babel",
'@token': "\"babel\"",
'@role': [Expression, Literal, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -905,7 +905,7 @@
'@role': [Binary, Expression, Identical, Operator, Relational],
},
right: { '@type': "StringLiteral",
'@token': "undefined",
'@token': "\"undefined\"",
'@role': [Binary, Expression, Literal, Right, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -1185,7 +1185,7 @@
'@role': [Binary, Expression, Identical, Not, Operator, Relational],
},
right: { '@type': "StringLiteral",
'@token': "object",
'@token': "\"object\"",
'@role': [Binary, Expression, Literal, Right, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down
2 changes: 1 addition & 1 deletion fixtures/flow-annotations.js.uast
Original file line number Diff line number Diff line change
Expand Up @@ -1634,7 +1634,7 @@
},
},
value: { '@type': "StringLiteral",
'@token': "foo",
'@token': "\"foo\"",
'@role': [Expression, Initialization, Literal, String, Value],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down
2 changes: 1 addition & 1 deletion fixtures/hello.js.sem.uast
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
col: 26,
},
},
Format: "",
Format: "single",
Value: "Hello World",
},
],
Expand Down
2 changes: 1 addition & 1 deletion fixtures/hello.js.uast
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
},
arguments: [
{ '@type': "StringLiteral",
'@token': "Hello World",
'@token': "'Hello World'",
'@role': [Argument, Call, Expression, Literal, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down
18 changes: 9 additions & 9 deletions fixtures/import-declaration.js.uast
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -109,7 +109,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -174,7 +174,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -256,7 +256,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -338,7 +338,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -468,7 +468,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -598,7 +598,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -711,7 +711,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -806,7 +806,7 @@
},
},
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down
2 changes: 1 addition & 1 deletion fixtures/issue24.js.uast
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@
},
],
source: { '@type': "StringLiteral",
'@token': "./file/file",
'@token': "\"./file/file\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down
Loading