Skip to content

Commit

Permalink
encoding/toml: check parser errors and reject duplicate keys
Browse files Browse the repository at this point in the history
And teach the tests about inputs which we expect to fail.
`want` is now `wantCUE` for clarity; `wantErr` uses `qt.ErrorMatches`.

Updates #68.

Signed-off-by: Daniel Martí <mvdan@mvdan.cc>
Change-Id: I2796a76faed01ed62c78145e257c8022efeb5ec3
Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1194707
Reviewed-by: Roger Peppe <rogpeppe@gmail.com>
TryBot-Result: CUEcueckoo <cueckoo@cuelang.org>
  • Loading branch information
mvdan committed May 15, 2024
1 parent abd5778 commit eacde77
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 41 deletions.
40 changes: 33 additions & 7 deletions encoding/toml/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
toml "github.com/pelletier/go-toml/v2/unstable"

"cuelang.org/go/cue/ast"
"cuelang.org/go/cue/literal"
"cuelang.org/go/cue/token"
)

Expand All @@ -34,7 +35,7 @@ import (
func NewDecoder(r io.Reader) *Decoder {
// Note that we don't consume the reader here,
// as there's no need, and we can't return an error either.
return &Decoder{r: r}
return &Decoder{r: r, seenKeys: make(map[string]bool)}
}

// Decoder implements the decoding state.
Expand All @@ -47,6 +48,11 @@ type Decoder struct {
decoded bool // whether [Decoder.Decoded] has been called already
parser toml.Parser

// seenKeys tracks which dot-separated rooted keys we have already decoded,
// as duplicate keys in TOML are not allowed.
// The string elements in between the dots may be quoted to avoid ambiguity.
seenKeys map[string]bool

currentFields []*ast.Field
}

Expand Down Expand Up @@ -77,6 +83,9 @@ func (d *Decoder) Decode() (ast.Node, error) {
return nil, err
}
}
if err := d.parser.Error(); err != nil {
return nil, err
}
for _, field := range d.currentFields {
file.Decls = append(file.Decls, field)
}
Expand Down Expand Up @@ -110,26 +119,36 @@ func (d *Decoder) nextRootNode(tnode *toml.Node) error {
// }
case toml.KeyValue:
keys := tnode.Key()
topField := &ast.Field{
curName := string(keys.Node().Data)
curField := &ast.Field{
Label: &ast.Ident{
NamePos: token.NoPos.WithRel(token.Newline),
Name: string(keys.Node().Data),
Name: curName,
},
}
ast.SetRelPos(topField.Label, token.Newline)

topField := curField
rootKey := quoteLabelIfNeeded(curName)

keys.Next() // TODO(mvdan): for some reason the first Next call doesn't count?
curField := topField
for keys.Next() {
nextName := string(keys.Node().Data)
nextField := &ast.Field{
Label: &ast.Ident{
NamePos: token.NoPos.WithRel(token.Blank),
Name: string(keys.Node().Data),
Name: nextName,
},
}
ast.SetRelPos(nextField.Label, token.Blank)

curField.Value = &ast.StructLit{Elts: []ast.Decl{nextField}}
curField = nextField
// TODO(mvdan): use an append-like API once we have benchmarks
rootKey += "." + quoteLabelIfNeeded(nextName)
}
if d.seenKeys[rootKey] {
return fmt.Errorf("duplicate key: %s", rootKey)
}
d.seenKeys[rootKey] = true
value, err := d.decodeExpr(tnode.Value())
if err != nil {
return err
Expand All @@ -144,6 +163,13 @@ func (d *Decoder) nextRootNode(tnode *toml.Node) error {
return nil
}

func quoteLabelIfNeeded(name string) string {
if ast.IsValidIdent(name) {
return name
}
return literal.Label.Quote(name)
}

// nextRootNode is called for every top-level expression from the TOML parser.
func (d *Decoder) decodeExpr(tnode *toml.Node) (ast.Expr, error) {
// TODO(mvdan): we currently assume that TOML basic literals (string, int, float)
Expand Down
71 changes: 37 additions & 34 deletions encoding/toml/decode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,25 +35,32 @@ func TestDecoder(t *testing.T) {
// The whitespace doesn't affect the input TOML, and we cue/format on the "want" CUE source,
// so the added newlines and tabs don't change the test behavior.
tests := []struct {
name string
input string
want string
name string
input string
wantCUE string
wantErr string
}{{
name: "Empty",
input: "",
want: "",
name: "Empty",
input: "",
wantCUE: "",
}, {
name: "LoneComment",
input: `
# Just a comment
`,
want: "",
wantCUE: "",
}, {
name: "RootKeyMissing",
input: `
= "no key name"
`,
wantErr: "invalid character at start of key: =",
}, {
name: "RootKeysOne",
input: `
key = "value"
`,
want: `
wantCUE: `
key: "value"
`,
}, {
Expand All @@ -63,7 +70,7 @@ func TestDecoder(t *testing.T) {
key2 = "value2"
key3 = "value3"
`,
want: `
wantCUE: `
key1: "value1"
key2: "value2"
key3: "value3"
Expand All @@ -75,7 +82,7 @@ func TestDecoder(t *testing.T) {
b1.b2 = "B"
c1.c2.c3 = "C"
`,
want: `
wantCUE: `
a1: "A"
b1: b2: "B"
c1: c2: c3: "C"
Expand All @@ -87,7 +94,7 @@ func TestDecoder(t *testing.T) {
a_b = "underscores"
123 = "numbers"
`,
want: `
wantCUE: `
"a-b": "dashes"
a_b: "underscores"
"123": "numbers"
Expand All @@ -99,7 +106,7 @@ func TestDecoder(t *testing.T) {
"foo bar" = "quoted space"
'foo "bar"' = "nested quotes"
`,
want: `
wantCUE: `
"1.2.3": "quoted dots"
"foo bar": "quoted space"
"foo \"bar\"": "nested quotes"
Expand All @@ -109,28 +116,23 @@ func TestDecoder(t *testing.T) {
input: `
site."foo.com".title = "foo bar"
`,
want: `
wantCUE: `
site: "foo.com": title: "foo bar"
`,
}, {
// TODO(mvdan): the TOML spec says that defining a key multiple times is invalid,
// we should error even though this can be OK in CUE as long as the values unify.
name: "RootKeysDuplicate",
input: `
foo = "same value"
foo = "same value"
`,
want: `
foo: "same value"
foo: "same value"
`,
wantErr: `duplicate key: foo`,
}, {
name: "BasicStrings",
input: `
escapes = "foo \"bar\" \n\t\\ baz"
unicode = "foo \u00E9"
`,
want: `
wantCUE: `
escapes: "foo \"bar\" \n\t\\ baz"
unicode: "foo é"
`,
Expand All @@ -153,7 +155,7 @@ line one \
line two.\
"""
`,
want: `
wantCUE: `
nested: " can contain \"\" quotes "
four: "\"four\""
double: "line one\nline two"
Expand All @@ -169,7 +171,7 @@ line two.\
quoted = 'Tom "Dubs" Preston-Werner'
regex = '<\i\c*\s*>'
`,
want: `
wantCUE: `
winpath: "C:\\Users\\nodejs\\templates"
winpath2: "\\\\ServerX\\admin$\\system32\\"
quoted: "Tom \"Dubs\" Preston-Werner"
Expand All @@ -194,7 +196,7 @@ line one \
line two.\
'''
`,
want: `
wantCUE: `
nested: " can contain '' quotes "
four: "'four'"
double: "line one\nline two"
Expand All @@ -213,7 +215,7 @@ line two.\
octal = 0o755
binary = 0b11010110
`,
want: `
wantCUE: `
zero: 0
positive: 123
plus: +40
Expand All @@ -234,7 +236,7 @@ line two.\
exponent_minus = -2E-4
exponent_dot = 6.789e-30
`,
want: `
wantCUE: `
pi: 3.1415
plus: +1.23
minus: -4.56
Expand All @@ -249,7 +251,7 @@ line two.\
positive = true
negative = false
`,
want: `
wantCUE: `
positive: true
negative: false
`,
Expand All @@ -263,7 +265,7 @@ line two.\
strings = [ "all", 'strings', """are the same""", '''type''' ]
mixed_numbers = [ 0.1, 0.2, 0.5, 1, 2, 5 ]
`,
want: `
wantCUE: `
integers: [1, 2, 3]
colors: ["red", "yellow", "green"]
nested_ints: [[1, 2], [3, 4, 5]]
Expand All @@ -280,13 +282,20 @@ line two.\
dec := toml.NewDecoder(strings.NewReader(test.input))

node, err := dec.Decode()
if test.wantErr != "" {
qt.Assert(t, qt.ErrorMatches(err, test.wantErr))
qt.Assert(t, qt.IsNil(node))
// We don't continue, so we can't expect any decoded CUE.
qt.Assert(t, qt.Equals(test.wantCUE, ""))
return
}
qt.Assert(t, qt.IsNil(err))

node2, err := dec.Decode()
qt.Assert(t, qt.IsNil(node2))
qt.Assert(t, qt.Equals(err, io.EOF))

wantFormatted, err := format.Source([]byte(test.want))
wantFormatted, err := format.Source([]byte(test.wantCUE))
qt.Assert(t, qt.IsNil(err))

formatted, err := format.Node(node)
Expand All @@ -301,12 +310,6 @@ line two.\
qt.Assert(t, qt.IsNil(val.Err()))
qt.Assert(t, qt.IsNil(val.Validate()))

// See the TODO above; go-toml rejects duplicate keys per the spec,
// but our decoder does not yet.
if test.name == "RootKeysDuplicate" {
return
}

// Validate that the decoded CUE value is equivalent
// to the Go value that a direct TOML unmarshal produces.
// We use JSON equality as some details such as which integer types are used
Expand Down

0 comments on commit eacde77

Please sign in to comment.