Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New YAML loader to support configuration location #828

Merged
merged 6 commits into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions libs/config/location.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package config

import "fmt"

type Location struct {
File string
Line int
Column int
}

func (l Location) String() string {
return fmt.Sprintf("%s:%d:%d", l.File, l.Line, l.Column)
}
106 changes: 106 additions & 0 deletions libs/config/node.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package config

import "time"

type Value struct {
v any
l Location

// Whether or not this value is an anchor.
// If this node doesn't map to a type, we don't need to warn about it.
anchor bool
}

var NilValue = Value{
v: nil,
l: Location{},
}

func New(v any, loc Location) Value {
pietern marked this conversation as resolved.
Show resolved Hide resolved
return Value{
v: v,
l: loc,
}
}

func (v Value) AsMap() (map[string]Value, bool) {
m, ok := v.v.(map[string]Value)
return m, ok
}

func (v Value) Location() Location {
return v.l
}

func (v Value) AsAny() any {
switch vv := v.v.(type) {
case map[string]Value:
m := make(map[string]any)
for k, v := range vv {
m[k] = v.AsAny()
}
return m
case []Value:
a := make([]any, len(vv))
for i, v := range vv {
a[i] = v.AsAny()
}
return a
case string:
return vv
pietern marked this conversation as resolved.
Show resolved Hide resolved
case bool:
return vv
case int:
return vv
case int32:
return vv
case int64:
return vv
case float32:
return vv
case float64:
return vv
case time.Time:
return vv
case nil:
return nil
default:
panic("not handled")
}
}

func (v Value) Get(key string) Value {
m, ok := v.AsMap()
if !ok {
return NilValue
}

vv, ok := m[key]
if !ok {
return NilValue
}

return vv
}

func (v Value) Index(i int) Value {
s, ok := v.v.([]Value)
if !ok {
return NilValue
}

if i < 0 || i >= len(s) {
return NilValue
}

return s[i]
}

func (v Value) MarkAnchor() Value {
return Value{
v: v.v,
l: v.l,

anchor: true,
}
}
227 changes: 227 additions & 0 deletions libs/config/yamlloader/loader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,227 @@
package yamlloader

import (
"fmt"
"math"
"strconv"
"strings"
"time"

"github.com/databricks/cli/libs/config"
"gopkg.in/yaml.v3"
)

type loader struct {
path string
}

func errorf(loc config.Location, format string, args ...interface{}) error {
return fmt.Errorf("yaml (%s): %s", loc, fmt.Sprintf(format, args...))
}

func newLoader(path string) *loader {
return &loader{
path: path,
}
}

func (d *loader) location(node *yaml.Node) config.Location {
return config.Location{
File: d.path,
Line: node.Line,
Column: node.Column,
}
}

func (d *loader) load(node *yaml.Node) (config.Value, error) {
loc := config.Location{
File: d.path,
Line: node.Line,
Column: node.Column,
}

var value config.Value
var err error

switch node.Kind {
case yaml.DocumentNode:
value, err = d.loadDocument(node, loc)
case yaml.SequenceNode:
value, err = d.loadSequence(node, loc)
case yaml.MappingNode:
value, err = d.loadMapping(node, loc)
case yaml.ScalarNode:
value, err = d.loadScalar(node, loc)
case yaml.AliasNode:
value, err = d.loadAlias(node, loc)
default:
return config.NilValue, errorf(loc, "unknown node kind: %v", node.Kind)
}

if err != nil {
return value, err
}

// Mark value as anchor if needed.
// If this node doesn't map to a type, we don't need to warn about it.
if node.Anchor != "" {
value = value.MarkAnchor()
}

return value, nil
}

func (d *loader) loadDocument(node *yaml.Node, loc config.Location) (config.Value, error) {
return d.load(node.Content[0])
}

func (d *loader) loadSequence(node *yaml.Node, loc config.Location) (config.Value, error) {
acc := make([]config.Value, len(node.Content))
for i, n := range node.Content {
v, err := d.load(n)
if err != nil {
return config.NilValue, err
}

acc[i] = v
}

return config.New(acc, loc), nil
}

func (d *loader) loadMapping(node *yaml.Node, loc config.Location) (config.Value, error) {
var merge *yaml.Node

acc := make(map[string]config.Value)
for i := 0; i < len(node.Content); i += 2 {
key := node.Content[i]
val := node.Content[i+1]

// Assert that keys are strings
if key.Kind != yaml.ScalarNode {
return config.NilValue, errorf(loc, "key is not a scalar")
}

st := key.ShortTag()
switch st {
case "!!str":
// OK
case "!!merge":
merge = val
continue
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess there is only one merge node in each mapping? Any sense in panicking if merge is non-nil and we hit this codepath?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is correct. It is not allowed to use the same key multiple times, which means that specifying << twice is not valid. To merge multiple anchors, the value for << can be an array. I checked and the parser returns an error if you specify the same key twice.

I'll still add the panic, though, that is cheap.

default:
return config.NilValue, errorf(loc, "invalid key tag: %v", st)
}

v, err := d.load(val)
if err != nil {
return config.NilValue, err
}

acc[key.Value] = v
}

if merge == nil {
return config.New(acc, loc), nil
}

// Build location for the merge node.
var mloc = d.location(merge)
var merr = errorf(mloc, "map merge requires map or sequence of maps as the value")

// Build a sequence of values to merge.
// The entries that we already accumulated have precedence.
var seq []map[string]config.Value
switch merge.Kind {
case yaml.SequenceNode:
for _, n := range merge.Content {
v, err := d.load(n)
if err != nil {
return config.NilValue, err
}
m, ok := v.AsMap()
if !ok {
return config.NilValue, merr
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we indicate to the user which anchor is incorrect and what its true type is? Would help with debugging.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error comes from merr and includes the location information where it is defined.

For example:

yaml (testdata/error_02.yml:5:7): map merge requires map or sequence of maps as the value

Where 5:7 maps to the opening bracket in the merge key:

# Use string anchor inside sequence to extend a mapping.
str: &str "Hello world!"

map:
  <<: [*str]
  key: value

}
seq = append(seq, m)
}
case yaml.AliasNode:
v, err := d.load(merge)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we ever intend this pathway and that inside the loop in the SequenceNode branch to diverge? If not, it may make sense to abstract this logic into a standalone function to ensure consistency over time.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do not. I rewrote this to first get a slice of *yaml.Node and then do the processing to abstract it.

if err != nil {
return config.NilValue, err
}
m, ok := v.AsMap()
if !ok {
return config.NilValue, merr
pietern marked this conversation as resolved.
Show resolved Hide resolved
}
seq = append(seq, m)
pietern marked this conversation as resolved.
Show resolved Hide resolved
default:
return config.NilValue, merr
}

// Append the accumulated entries to the sequence.
seq = append(seq, acc)
out := make(map[string]config.Value)
for _, m := range seq {
for k, v := range m {
out[k] = v
}
}

return config.New(out, loc), nil
}

func (d *loader) loadScalar(node *yaml.Node, loc config.Location) (config.Value, error) {
st := node.ShortTag()
switch st {
case "!!str":
return config.New(node.Value, loc), nil
case "!!bool":
switch strings.ToLower(node.Value) {
case "true":
return config.New(true, loc), nil
case "false":
return config.New(false, loc), nil
default:
return config.NilValue, errorf(loc, "invalid bool value: %v", node.Value)
}
case "!!int":
i64, err := strconv.ParseInt(node.Value, 10, 64)
if err != nil {
return config.NilValue, errorf(loc, "invalid int value: %v", node.Value)
}
// Use regular int type instead of int64 if possible.
if i64 >= math.MinInt32 && i64 <= math.MaxInt32 {
return config.New(int(i64), loc), nil
}
return config.New(i64, loc), nil
case "!!float":
f64, err := strconv.ParseFloat(node.Value, 64)
if err != nil {
return config.NilValue, errorf(loc, "invalid float value: %v", node.Value)
}
return config.New(f64, loc), nil
case "!!null":
return config.New(nil, loc), nil
case "!!timestamp":
// Try a couple of layouts
for _, layout := range []string{
"2006-1-2T15:4:5.999999999Z07:00", // RCF3339Nano with short date fields.
"2006-1-2t15:4:5.999999999Z07:00", // RFC3339Nano with short date fields and lower-case "t".
"2006-1-2 15:4:5.999999999", // space separated with no time zone
"2006-1-2", // date only
} {
t, terr := time.Parse(layout, node.Value)
if terr == nil {
return config.New(t, loc), nil
}
}
return config.NilValue, errorf(loc, "invalid timestamp value: %v", node.Value)
default:
return config.NilValue, errorf(loc, "unknown tag: %v", st)
}
}

func (d *loader) loadAlias(node *yaml.Node, loc config.Location) (config.Value, error) {
return d.load(node.Alias)
}
12 changes: 12 additions & 0 deletions libs/config/yamlloader/testdata/anchor_01.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# 1. Basic Anchor and Alias
defaults: &DEFAULTS
color: red
size: large

shirt1:
<<: *DEFAULTS
pattern: striped

shirt2:
<<: *DEFAULTS
pattern: plain
13 changes: 13 additions & 0 deletions libs/config/yamlloader/testdata/anchor_02.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# 2. Merging Anchors
# Here, multiple anchors can be merged into a single item.
defaults: &DEFAULTS
color: red
size: large

materials: &MATERIALS
primary: cotton
secondary: polyester

shirt:
<<: [*DEFAULTS, *MATERIALS]
pattern: striped
10 changes: 10 additions & 0 deletions libs/config/yamlloader/testdata/anchor_03.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# 3. Overriding Merged Anchor Values
# You can override values when merging.
defaults: &DEFAULTS
color: red
size: large
pattern: plain

shirt:
<<: *DEFAULTS
color: blue
Loading
Loading