Skip to content

Commit

Permalink
cue/literal: implement CUE-specific quoting
Browse files Browse the repository at this point in the history
Using strconv.Quote is incorrect, as CUE strings are not
entirely the same, resulting in bugs

Code and test cases have been copied from strconv.Quote
and is not based on the deleted (similar) implementation
of internal/core/export/quote.go.
Tests have been augmented with CUE-specific cases.

Note that because of the more complex string syntax of CUE,
the API is somewhat tricky. This API is hopefully straigtforward
given the number of options that need to be covered (not all
implemented).

Also fixes:
- variable indentation
- using tabs instead of spaces for indentation
- fixes multiline interpolation export bug

Fixes #122
Fixes #514
Fixes #540
Fixes #541

Change-Id: If79954678acbd6c9ded2da564856ac28018ba8e1
Reviewed-on: https://cue-review.googlesource.com/c/cue/+/7282
Reviewed-by: CUE cueckoo <cueckoo@gmail.com>
Reviewed-by: Marcel van Lohuizen <mpvl@golang.org>
  • Loading branch information
mpvl committed Oct 3, 2020
1 parent 03fa5a8 commit c886094
Show file tree
Hide file tree
Showing 21 changed files with 2,038 additions and 1,612 deletions.
3 changes: 2 additions & 1 deletion cmd/cue/cmd/get_go.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
cueast "cuelang.org/go/cue/ast"
"cuelang.org/go/cue/ast/astutil"
"cuelang.org/go/cue/format"
"cuelang.org/go/cue/literal"
"cuelang.org/go/cue/load"
"cuelang.org/go/cue/parser"
cuetoken "cuelang.org/go/cue/token"
Expand Down Expand Up @@ -1128,7 +1129,7 @@ func (e *extractor) addFields(x *types.Struct, st *cueast.StructLit) {

if typeName != cueStr {
if strings.ContainsAny(typeName, `#"',()=`) {
typeName = strconv.Quote(typeName)
typeName = literal.String.Quote(typeName)
}
fmt.Fprint(buf, ",", typeName)
}
Expand Down
11 changes: 7 additions & 4 deletions cue/ast/ast.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ package ast // import "cuelang.org/go/cue/ast"

import (
"fmt"
"strconv"
"strings"

"cuelang.org/go/cue/literal"
"cuelang.org/go/cue/token"
)

Expand Down Expand Up @@ -426,12 +426,15 @@ type BasicLit struct {
label
}

// TODO: introduce and use NewLabel and NewBytes and perhaps NewText (in the
// later case NewString would return a string or bytes type) to distinguish from
// NewString. Consider how to pass indentation information.

// NewString creates a new BasicLit with a string value without position.
// It quotes the given string.
// Useful for ASTs generated by code other than the CUE parser.
func NewString(str string) *BasicLit {
// TODO: use CUE quoting.
str = strconv.Quote(str)
str = literal.String.Quote(str)
return &BasicLit{Kind: token.STRING, ValuePos: token.NoPos, Value: str}
}

Expand Down Expand Up @@ -863,7 +866,7 @@ type ImportSpec struct {
func (*ImportSpec) specNode() {}

func NewImport(name *Ident, importPath string) *ImportSpec {
importPath = strconv.Quote(importPath)
importPath = literal.String.Quote(importPath)
path := &BasicLit{Kind: token.STRING, Value: importPath}
return &ImportSpec{Name: name, Path: path}
}
Expand Down
5 changes: 2 additions & 3 deletions cue/format/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ package format

import (
"fmt"
"strconv"
"strings"

"cuelang.org/go/cue/ast"
Expand Down Expand Up @@ -472,7 +471,7 @@ func (f *formatter) label(l ast.Label, optional bool) {
// if the AST is not generated by the parser.
name := n.Name
if !ast.IsValidIdent(name) {
name = strconv.Quote(n.Name)
name = literal.String.Quote(n.Name)
}
f.print(n.NamePos, name)

Expand All @@ -482,7 +481,7 @@ func (f *formatter) label(l ast.Label, optional bool) {
// according to spec.
if strings.HasPrefix(str, `"""`) || strings.HasPrefix(str, "#") {
if u, err := literal.Unquote(str); err == nil {
str = strconv.Quote(u)
str = literal.String.Quote(u)
}
}
f.print(n.ValuePos, str)
Expand Down
305 changes: 305 additions & 0 deletions cue/literal/quote.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,305 @@
// Copyright 2020 CUE Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package literal

import (
"strconv"
"strings"
"unicode/utf8"
)

// Form defines how to quote a string or bytes literal.
type Form struct {
quote byte
multiline bool
auto bool
exact bool
asciiOnly bool
graphicOnly bool
indent string
}

// TODO:
// - Fixed or max level of escape modifiers (#""#).
// - Option to fall back to bytes if value cannot be represented as string.
// E.g. ExactString.
// - QuoteExact that fails with an error if a string cannot be represented
// without loss.
// - Handle auto-breaking for long lines (Swift-style, \-terminated lines).
// This is not supported yet in CUE, but may, and should be considred as
// a possibility in API design.
// - Other possible convenience forms: Blob (auto-break bytes), String (bytes
// or string), Label.

// WithTabIndent returns a new Form with indentation set to the given number
// of tabs. The result will be a multiline string.
func (f Form) WithTabIndent(tabs int) Form {
if tabs < len(tabIndent) {
f.indent = tabIndent[:tabs]
} else {
f.indent = strings.Repeat("\t", tabs)
}
f.multiline = true
return f
}

const tabIndent = "\t\t\t\t\t\t\t\t\t\t\t\t"

// WithOptionalIndent is like WithTabIndent, but only returns a multiline
// strings if it doesn't contain any newline characters.
func (f Form) WithOptionalTabIndent(tabs int) Form {
if tabs < len(tabIndent) {
f.indent = tabIndent[:tabs]
} else {
f.indent = strings.Repeat("\t", tabs)
}
f.auto = true
return f
}

// WithASCIIOnly ensures the quoted strings consists solely of valid ASCII
// characters.
func (f Form) WithASCIIOnly() Form {
f.asciiOnly = true
return f
}

// WithGraphicOnly ensures the quoted strings consists solely of printable
// characters.
func (f Form) WithGraphicOnly() Form {
f.graphicOnly = true
return f
}

var (
// String defines the format of a CUE string. Conversions may be lossy.
String Form = stringForm

// TODO: ExactString: quotes to bytes type if the string cannot be
// represented without loss of accuracy.

// Label is like Text, but optimized for labels.
Label Form = stringForm

// Bytes defines the format of bytes literal.
Bytes Form = bytesForm

stringForm = Form{quote: '"'}
bytesForm = Form{quote: '\'', exact: true}
)

// Quote returns CUE string literal representing s. The returned string uses CUE
// escape sequences (\t, \n, \u00FF, \u0100) for control characters and
// non-printable characters as defined by strconv.IsPrint.
//
// It reports an error if the string cannot be converted to the desired form.
func (f Form) Quote(s string) string {
return string(f.Append(make([]byte, 0, 3*len(s)/2), s))
}

const (
lowerhex = "0123456789abcdef"
)

// Append appends a CUE string literal representing s, as generated by Quote, to
// buf and returns the extended buffer.
func (f Form) Append(buf []byte, s string) []byte {
if f.auto && strings.ContainsRune(s, '\n') {
f.multiline = true
}

// Often called with big strings, so preallocate. If there's quoting,
// this is conservative but still helps a lot.
if cap(buf)-len(buf) < len(s) {
nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
copy(nBuf, buf)
buf = nBuf
}
buf = append(buf, f.quote)
if f.multiline {
buf = append(buf, f.quote, f.quote, '\n')
if s == "" {
buf = append(buf, f.indent...)
buf = append(buf, f.quote, f.quote, f.quote)
return buf
}
if len(s) > 0 && s[0] != '\n' {
buf = append(buf, f.indent...)
}
}

buf = f.appendEscaped(buf, s)

if f.multiline {
buf = append(buf, '\n')
buf = append(buf, f.indent...)
buf = append(buf, f.quote, f.quote, f.quote)
} else {
buf = append(buf, f.quote)
}

return buf
}

// AppendEscaped appends a CUE string literal representing s, as generated by
// Quote but without the quotes, to buf and returns the extended buffer.
//
// It does not include the last indentation.
func (f Form) AppendEscaped(buf []byte, s string) []byte {
if f.auto && strings.ContainsRune(s, '\n') {
f.multiline = true
}

// Often called with big strings, so preallocate. If there's quoting,
// this is conservative but still helps a lot.
if cap(buf)-len(buf) < len(s) {
nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
copy(nBuf, buf)
buf = nBuf
}

buf = f.appendEscaped(buf, s)

return buf
}

func (f Form) appendEscaped(buf []byte, s string) []byte {
for width := 0; len(s) > 0; s = s[width:] {
r := rune(s[0])
width = 1
if r >= utf8.RuneSelf {
r, width = utf8.DecodeRuneInString(s)
}
if f.exact && width == 1 && r == utf8.RuneError {
buf = append(buf, `\x`...)
buf = append(buf, lowerhex[s[0]>>4])
buf = append(buf, lowerhex[s[0]&0xF])
continue
}
if f.multiline && r == '\n' {
buf = append(buf, '\n')
if len(s) > 1 && s[1] != '\n' {
buf = append(buf, f.indent...)
}
continue
}
buf = f.appendEscapedRune(buf, r)
}
return buf
}

func (f *Form) appendEscapedRune(buf []byte, r rune) []byte {
var runeTmp [utf8.UTFMax]byte
if (!f.multiline && r == rune(f.quote)) || r == '\\' { // always backslashed
buf = append(buf, '\\')
buf = append(buf, byte(r))
return buf
}
if f.asciiOnly {
if r < utf8.RuneSelf && strconv.IsPrint(r) {
buf = append(buf, byte(r))
return buf
}
} else if strconv.IsPrint(r) || f.graphicOnly && isInGraphicList(r) {
n := utf8.EncodeRune(runeTmp[:], r)
buf = append(buf, runeTmp[:n]...)
return buf
}
switch r {
case '\a':
buf = append(buf, `\a`...)
case '\b':
buf = append(buf, `\b`...)
case '\f':
buf = append(buf, `\f`...)
case '\n':
buf = append(buf, `\n`...)
case '\r':
buf = append(buf, `\r`...)
case '\t':
buf = append(buf, `\t`...)
case '\v':
buf = append(buf, `\v`...)
default:
switch {
case r < ' ' && f.exact:
buf = append(buf, `\x`...)
buf = append(buf, lowerhex[byte(r)>>4])
buf = append(buf, lowerhex[byte(r)&0xF])
case r > utf8.MaxRune:
r = 0xFFFD
fallthrough
case r < 0x10000:
buf = append(buf, `\u`...)
for s := 12; s >= 0; s -= 4 {
buf = append(buf, lowerhex[r>>uint(s)&0xF])
}
default:
buf = append(buf, `\U`...)
for s := 28; s >= 0; s -= 4 {
buf = append(buf, lowerhex[r>>uint(s)&0xF])
}
}
}
return buf
}

// isInGraphicList reports whether the rune is in the isGraphic list. This separation
// from IsGraphic allows quoteWith to avoid two calls to IsPrint.
// Should be called only if IsPrint fails.
func isInGraphicList(r rune) bool {
// We know r must fit in 16 bits - see makeisprint.go.
if r > 0xFFFF {
return false
}
rr := uint16(r)
i := bsearch16(isGraphic, rr)
return i < len(isGraphic) && rr == isGraphic[i]
}

// bsearch16 returns the smallest i such that a[i] >= x.
// If there is no such i, bsearch16 returns len(a).
func bsearch16(a []uint16, x uint16) int {
i, j := 0, len(a)
for i < j {
h := i + (j-i)/2
if a[h] < x {
i = h + 1
} else {
j = h
}
}
return i
}

// isGraphic lists the graphic runes not matched by IsPrint.
var isGraphic = []uint16{
0x00a0,
0x1680,
0x2000,
0x2001,
0x2002,
0x2003,
0x2004,
0x2005,
0x2006,
0x2007,
0x2008,
0x2009,
0x200a,
0x202f,
0x205f,
0x3000,
}

0 comments on commit c886094

Please sign in to comment.