Skip to content
This repository has been archived by the owner on Mar 8, 2020. It is now read-only.

Commit

Permalink
preserve raw string literals in the native ast; fixes #32
Browse files Browse the repository at this point in the history
Signed-off-by: Denys Smirnov <denys@sourced.tech>
  • Loading branch information
Denys Smirnov committed Jan 3, 2019
1 parent aebcf20 commit 676f7ea
Show file tree
Hide file tree
Showing 41 changed files with 464 additions and 86 deletions.
69 changes: 68 additions & 1 deletion driver/normalizer/normalizer.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package normalizer

import (
"strings"

"gopkg.in/bblfsh/sdk.v2/uast"
"gopkg.in/bblfsh/sdk.v2/uast/nodes"
. "gopkg.in/bblfsh/sdk.v2/uast/transformer"
)

Expand Down Expand Up @@ -29,6 +32,22 @@ var Preprocessors = []Mapping{
Part("_", Obj{"loc": AnyNode(nil)}),
Part("_", Obj{}),
),
// preserve raw string literal
Map(
Part("_", Obj{
uast.KeyType: String("StringLiteral"),
"value": AnyNode(nil),
"extra": Obj{
"raw": Var("raw"),
"rawValue": AnyNode(nil),
},
}),
Part("_", Obj{
uast.KeyType: String("StringLiteral"),
"value": Var("raw"),
}),
),
// drop extra info for other nodes (it duplicates other node fields)
Map(
Part("_", Obj{"extra": AnyNode(nil)}),
Part("_", Obj{}),
Expand All @@ -47,7 +66,16 @@ var Normalizers = []Mapping{
)),
MapSemantic("StringLiteral", uast.String{}, MapObj(
Obj{
"value": Var("val"),
"value": singleQuote{Var("val")},
},
Obj{
"Value": Var("val"),
"Format": String("single"),
},
)),
MapSemantic("StringLiteral", uast.String{}, MapObj(
Obj{
"value": Quote(Var("val")),
},
Obj{
"Value": Var("val"),
Expand Down Expand Up @@ -243,3 +271,42 @@ var Normalizers = []Mapping{
},
)),
}

type singleQuote struct {
op Op
}

func (op singleQuote) Kinds() nodes.Kind {
return nodes.KindString
}

func (op singleQuote) Check(st *State, n nodes.Node) (bool, error) {
sn, ok := n.(nodes.String)
if !ok {
return false, nil
}
s := string(sn)
if !strings.HasPrefix(s, `'`) || !strings.HasSuffix(s, `'`) {
return false, nil
}
s = s[1 : len(s)-1]
s, err := unquoteSingle(s)
if err != nil {
return false, err
}
return op.op.Check(st, nodes.String(s))
}

func (op singleQuote) Construct(st *State, n nodes.Node) (nodes.Node, error) {
n, err := op.op.Construct(st, n)
if err != nil {
return nil, err
}
sn, ok := n.(nodes.String)
if !ok {
return nil, ErrUnexpectedType.New(nodes.String(""), n)
}
s := string(sn)
s = quoteSingle(s)
return nodes.String(s), nil
}
109 changes: 109 additions & 0 deletions driver/normalizer/strconv.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
package normalizer

import (
"strconv"
"unicode/utf8"
)

// Functions below are copied from strconv.Unquote and strconv.Quote.
// Original functions are unable to escape/unescape values containing
// multiple characters since in Go single quotes represent a rune literal

// unquoteSingle is the same as strconv.Unquote, but uses ' as a quote.
func unquoteSingle(s string) (string, error) {
var runeTmp [utf8.UTFMax]byte
buf := make([]byte, 0, 3*len(s)/2)
for len(s) > 0 {
c, multibyte, ss, err := strconv.UnquoteChar(s, '\'')
if err != nil {
return "", err
}
s = ss
if c < utf8.RuneSelf || !multibyte {
buf = append(buf, byte(c))
} else {
n := utf8.EncodeRune(runeTmp[:], c)
buf = append(buf, runeTmp[:n]...)
}
}
return string(buf), nil
}

const lowerhex = "0123456789abcdef"

// quoteSingle is the same as strconv.Quote, but uses ' as a quote.
func quoteSingle(s string) string {
const (
quote = '\''
)

buf := make([]byte, 0, 3*len(s)/2)
buf = append(buf, quote)
for width := 0; len(s) > 0; s = s[width:] {
r := rune(s[0])
width = 1
if r >= utf8.RuneSelf {
r, width = utf8.DecodeRuneInString(s)
}
if width == 1 && r == utf8.RuneError {
buf = append(buf, `\x`...)
buf = append(buf, lowerhex[s[0]>>4])
buf = append(buf, lowerhex[s[0]&0xF])
continue
}
buf = appendEscapedRune(buf, r, quote)
}
buf = append(buf, quote)
return string(buf)
}

func appendEscapedRune(buf []byte, r rune, quote byte) []byte {
var runeTmp [utf8.UTFMax]byte
if r == rune(quote) || r == '\\' { // always backslashed
buf = append(buf, '\\')
buf = append(buf, byte(r))
return buf
}
if strconv.IsPrint(r) {
n := utf8.EncodeRune(runeTmp[:], r)
buf = append(buf, runeTmp[:n]...)
return buf
}
switch r {
case '\a':
buf = append(buf, `\a`...)
case '\b':
buf = append(buf, `\b`...)
case '\f':
buf = append(buf, `\f`...)
case '\n':
buf = append(buf, `\n`...)
case '\r':
buf = append(buf, `\r`...)
case '\t':
buf = append(buf, `\t`...)
case '\v':
buf = append(buf, `\v`...)
default:
switch {
case r < ' ':
buf = append(buf, `\x`...)
buf = append(buf, lowerhex[byte(r)>>4])
buf = append(buf, lowerhex[byte(r)&0xF])
case r > utf8.MaxRune:
r = 0xFFFD
fallthrough
case r < 0x10000:
buf = append(buf, `\u`...)
for s := 12; s >= 0; s -= 4 {
buf = append(buf, lowerhex[r>>uint(s)&0xF])
}
default:
buf = append(buf, `\U`...)
for s := 28; s >= 0; s -= 4 {
buf = append(buf, lowerhex[r>>uint(s)&0xF])
}
}
}
return buf
}
2 changes: 1 addition & 1 deletion fixtures/call-expression.js.sem.uast
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@
col: 10,
},
},
Format: "",
Format: "single",
Value: "",
},
],
Expand Down
2 changes: 1 addition & 1 deletion fixtures/call-expression.js.uast
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,7 @@
},
arguments: [
{ '@type': "StringLiteral",
'@token': "",
'@token': "''",
'@role': [Argument, Call, Expression, Literal, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down
2 changes: 1 addition & 1 deletion fixtures/export-declaration.js.uast
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@
},
exportKind: "value",
source: { '@type': "StringLiteral",
'@token': "mod",
'@token': "\"mod\"",
'@role': [Expression, Literal, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down
6 changes: 3 additions & 3 deletions fixtures/ext_typedecl.js.uast
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,7 @@
},
},
expression: { '@type': "StringLiteral",
'@token': "babel",
'@token': "\"babel\"",
'@role': [Expression, Literal, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -905,7 +905,7 @@
'@role': [Binary, Expression, Identical, Operator, Relational],
},
right: { '@type': "StringLiteral",
'@token': "undefined",
'@token': "\"undefined\"",
'@role': [Binary, Expression, Literal, Right, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -1185,7 +1185,7 @@
'@role': [Binary, Expression, Identical, Not, Operator, Relational],
},
right: { '@type': "StringLiteral",
'@token': "object",
'@token': "\"object\"",
'@role': [Binary, Expression, Literal, Right, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down
2 changes: 1 addition & 1 deletion fixtures/flow-annotations.js.uast
Original file line number Diff line number Diff line change
Expand Up @@ -1634,7 +1634,7 @@
},
},
value: { '@type': "StringLiteral",
'@token': "foo",
'@token': "\"foo\"",
'@role': [Expression, Initialization, Literal, String, Value],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down
2 changes: 1 addition & 1 deletion fixtures/hello.js.sem.uast
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
col: 26,
},
},
Format: "",
Format: "single",
Value: "Hello World",
},
],
Expand Down
2 changes: 1 addition & 1 deletion fixtures/hello.js.uast
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
},
arguments: [
{ '@type': "StringLiteral",
'@token': "Hello World",
'@token': "'Hello World'",
'@role': [Argument, Call, Expression, Literal, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down
18 changes: 9 additions & 9 deletions fixtures/import-declaration.js.uast
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -109,7 +109,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -174,7 +174,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -256,7 +256,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -338,7 +338,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -468,7 +468,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -598,7 +598,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -711,7 +711,7 @@
},
importKind: "value",
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down Expand Up @@ -806,7 +806,7 @@
},
},
source: { '@type': "StringLiteral",
'@token': "module-name",
'@token': "\"module-name\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down
2 changes: 1 addition & 1 deletion fixtures/issue24.js.uast
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@
},
],
source: { '@type': "StringLiteral",
'@token': "./file/file",
'@token': "\"./file/file\"",
'@role': [Expression, Import, Literal, Pathname, String],
'@pos': { '@type': "uast:Positions",
start: { '@type': "uast:Position",
Expand Down
Loading

0 comments on commit 676f7ea

Please sign in to comment.