From 022c38cba765a4840b83bb4acbe19ad5050c9b2b Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 16 Mar 2026 00:23:56 +0100 Subject: [PATCH 1/3] Replace regex-based interpolation with character scanner - Move interpolation parser to libs/interpolation/ (independent of dyn) - Use \$ -> $ and \\ -> \ escape sequences (Bash convention) - Reject nested ${} constructs as errors - Replace strings.Replace interpolation with token-based concatenation - Add WarnMalformedReferences mutator for early parse error warnings - Add NewRefWithDiagnostics for surfacing parse errors as diagnostics Co-authored-by: Isaac --- .../bad_syntax/out.deploy.direct.txt | 3 + .../bad_syntax/out.plan.direct.txt | 3 + .../bad_syntax/out.plan.terraform.txt | 3 + .../resource_deps/bad_syntax/output.txt | 3 + .../malformed_reference/databricks.yml | 6 + .../malformed_reference/out.test.toml | 5 + .../variables/malformed_reference/output.txt | 12 + .../variables/malformed_reference/script | 1 + .../mutator/warn_malformed_references.go | 47 +++ bundle/phases/initialize.go | 4 + design/interpolation-parser.md | 72 +++++ libs/dyn/dynvar/ref.go | 125 +++++--- libs/dyn/dynvar/ref_test.go | 18 ++ libs/dyn/dynvar/resolve.go | 50 ++-- libs/dyn/dynvar/resolve_test.go | 13 + libs/interpolation/parse.go | 213 ++++++++++++++ libs/interpolation/parse_test.go | 267 ++++++++++++++++++ 17 files changed, 790 insertions(+), 55 deletions(-) create mode 100644 acceptance/bundle/variables/malformed_reference/databricks.yml create mode 100644 acceptance/bundle/variables/malformed_reference/out.test.toml create mode 100644 acceptance/bundle/variables/malformed_reference/output.txt create mode 100644 acceptance/bundle/variables/malformed_reference/script create mode 100644 bundle/config/mutator/warn_malformed_references.go create mode 100644 design/interpolation-parser.md create mode 100644 libs/interpolation/parse.go create mode 100644 libs/interpolation/parse_test.go diff --git a/acceptance/bundle/resource_deps/bad_syntax/out.deploy.direct.txt b/acceptance/bundle/resource_deps/bad_syntax/out.deploy.direct.txt index 6bed0296b3..a985c8415e 100644 --- a/acceptance/bundle/resource_deps/bad_syntax/out.deploy.direct.txt +++ b/acceptance/bundle/resource_deps/bad_syntax/out.deploy.direct.txt @@ -1,3 +1,6 @@ +Warning: invalid variable reference ${resources.volumes.bar.bad..syntax}: invalid path + in databricks.yml:11:21 + Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/test-bundle/default/files... Deploying resources... Updating deployment state... diff --git a/acceptance/bundle/resource_deps/bad_syntax/out.plan.direct.txt b/acceptance/bundle/resource_deps/bad_syntax/out.plan.direct.txt index 1a40fdbaa3..a53d32c081 100644 --- a/acceptance/bundle/resource_deps/bad_syntax/out.plan.direct.txt +++ b/acceptance/bundle/resource_deps/bad_syntax/out.plan.direct.txt @@ -1,3 +1,6 @@ +Warning: invalid variable reference ${resources.volumes.bar.bad..syntax}: invalid path + in databricks.yml:11:21 + create volumes.bar create volumes.foo diff --git a/acceptance/bundle/resource_deps/bad_syntax/out.plan.terraform.txt b/acceptance/bundle/resource_deps/bad_syntax/out.plan.terraform.txt index b8f8078aba..ed957b8518 100644 --- a/acceptance/bundle/resource_deps/bad_syntax/out.plan.terraform.txt +++ b/acceptance/bundle/resource_deps/bad_syntax/out.plan.terraform.txt @@ -1,3 +1,6 @@ +Warning: invalid variable reference ${resources.volumes.bar.bad..syntax}: invalid path + in databricks.yml:11:21 + Error: exit status 1 Error: Invalid attribute name diff --git a/acceptance/bundle/resource_deps/bad_syntax/output.txt b/acceptance/bundle/resource_deps/bad_syntax/output.txt index 0e9d83b643..968ebf652a 100644 --- a/acceptance/bundle/resource_deps/bad_syntax/output.txt +++ b/acceptance/bundle/resource_deps/bad_syntax/output.txt @@ -1,5 +1,8 @@ >>> [CLI] bundle validate -o json +Warning: invalid variable reference ${resources.volumes.bar.bad..syntax}: invalid path + in databricks.yml:11:21 + { "volumes": { "bar": { diff --git a/acceptance/bundle/variables/malformed_reference/databricks.yml b/acceptance/bundle/variables/malformed_reference/databricks.yml new file mode 100644 index 0000000000..63a0282395 --- /dev/null +++ b/acceptance/bundle/variables/malformed_reference/databricks.yml @@ -0,0 +1,6 @@ +bundle: + name: "${foo.bar-}" + +variables: + a: + default: hello diff --git a/acceptance/bundle/variables/malformed_reference/out.test.toml b/acceptance/bundle/variables/malformed_reference/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/bundle/variables/malformed_reference/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/variables/malformed_reference/output.txt b/acceptance/bundle/variables/malformed_reference/output.txt new file mode 100644 index 0000000000..fba4d19be6 --- /dev/null +++ b/acceptance/bundle/variables/malformed_reference/output.txt @@ -0,0 +1,12 @@ + +>>> [CLI] bundle validate +Warning: invalid variable reference ${foo.bar-}: invalid key "bar-" + in databricks.yml:2:9 + +Name: ${foo.bar-} +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/${foo.bar-}/default + +Found 1 warning diff --git a/acceptance/bundle/variables/malformed_reference/script b/acceptance/bundle/variables/malformed_reference/script new file mode 100644 index 0000000000..5350876150 --- /dev/null +++ b/acceptance/bundle/variables/malformed_reference/script @@ -0,0 +1 @@ +trace $CLI bundle validate diff --git a/bundle/config/mutator/warn_malformed_references.go b/bundle/config/mutator/warn_malformed_references.go new file mode 100644 index 0000000000..cf6934c7b7 --- /dev/null +++ b/bundle/config/mutator/warn_malformed_references.go @@ -0,0 +1,47 @@ +package mutator + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/dyn/dynvar" +) + +type warnMalformedReferences struct{} + +// WarnMalformedReferences returns a mutator that emits warnings for strings +// containing malformed variable references (e.g. "${foo.bar-}"). +func WarnMalformedReferences() bundle.Mutator { + return &warnMalformedReferences{} +} + +func (*warnMalformedReferences) Name() string { + return "WarnMalformedReferences" +} + +func (*warnMalformedReferences) Validate(ctx context.Context, b *bundle.Bundle) error { + return nil +} + +func (*warnMalformedReferences) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + var diags diag.Diagnostics + err := b.Config.Mutate(func(root dyn.Value) (dyn.Value, error) { + _, err := dyn.Walk(root, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + // Only check values with source locations to avoid false positives + // from synthesized/computed values. + if len(v.Locations()) == 0 { + return v, nil + } + _, _, refDiags := dynvar.NewRefWithDiagnostics(v) + diags = diags.Extend(refDiags) + return v, nil + }) + return root, err + }) + if err != nil { + diags = diags.Extend(diag.FromErr(err)) + } + return diags +} diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 761714b48e..02da7a8f15 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -94,6 +94,10 @@ func Initialize(ctx context.Context, b *bundle.Bundle) { // searches for strings with variable references in them. mutator.RewriteWorkspacePrefix(), + // Walks the config tree and emits warnings for malformed variable references + // (e.g. "${foo.bar-}") before variable resolution occurs. + mutator.WarnMalformedReferences(), + // Reads (dynamic): variables.* (checks if there's a value assigned to variable already or if it has lookup reference) // Updates (dynamic): variables.*.value (sets values from environment variables, variable files, or defaults) // Resolves and sets values for bundle variables in the following order: from environment variables, from variable files and then defaults diff --git a/design/interpolation-parser.md b/design/interpolation-parser.md new file mode 100644 index 0000000000..991a006278 --- /dev/null +++ b/design/interpolation-parser.md @@ -0,0 +1,72 @@ +# Variable Interpolation: Character Scanner Parser + +Author: Shreyas Goenka +Date: 12 March 2026 + +## Motivation + +DABs variable interpolation (`${...}`) was regex-based. This caused: + +1. **Silent failures** — `${foo.bar-}` silently treated as literal text with no warning. +2. **No suggestions** — `${bundle.nme}` produces "reference does not exist" with no hint. +3. **No escape mechanism** — no way to produce a literal `${` in output. +4. **No extensibility** — cannot support structured path features like key-value references `tasks[task_key="x"]` that exist in `libs/structs/structpath`. + +## Background: How Other Systems Parse `${...}` + +| System | Strategy | Escape | Error Quality | +|--------|----------|--------|---------------| +| Go `text/template` | State-function lexer | None | Line + template name | +| HCL2 (Terraform) | Ragel FSM + recursive descent | `$${` → literal `${` | Source range + suggestions | +| Python f-strings | Mode-stack tokenizer | `{{` → `{` | Line/column | +| Rust `format!` | Iterator-based descent | `{{`/`}}` | Spans + suggestions | +| Bash | Char-by-char + depth tracking | `\$` | Line number | + +For a syntax as simple as `${path.to.var[0]}` (no nesting, no functions, no +operators), a full recursive descent parser is overkill. A **two-mode character +scanner** — the same core pattern used by Go's `text/template` and HCL — gives +proper error reporting and escape support without the complexity. + +## Design Decisions + +### Two-mode character scanner + +A two-mode scanner (TEXT / REFERENCE) that produces a flat list of tokens. +No AST, no recursive descent. Easy to port to the Python implementation. + +See `libs/interpolation/parse.go`. + +### Nested `${` rejection + +Nested `${...}` inside a reference (e.g., `${var.foo_${var.tail}}`) is +rejected as an error. This construct is ambiguous and was never intentionally +supported — the old regex happened to match only the innermost pair by +coincidence. + +### `\$` escape sequence + +`\$` produces a literal `$`, and `\\` produces a literal `\`. This follows +the same convention used by Bash for escaping `$` and is the least +surprising option for users working in shell environments. + +A standalone `\` before any character other than `$` or `\` is passed +through as a literal backslash, so existing configurations that happen to +contain backslashes are not affected. + +### Malformed reference warnings + +A standalone `WarnMalformedReferences` mutator walks the config tree once +before variable resolution. It only checks values with source locations +(`len(v.Locations()) > 0`) to avoid false positives from synthesized values +(e.g., normalized/computed paths). + +### Token-based resolution + +The resolver's string interpolation changed from `strings.Replace` (with +count=1 to avoid double-replacing duplicate refs) to a token concatenation +loop. Each `TokenRef` maps 1:1 to a resolved value, eliminating the ambiguity. + +## Python sync + +The Python regex in `python/databricks/bundles/core/_transform.py` needs a +corresponding update in a follow-up PR. diff --git a/libs/dyn/dynvar/ref.go b/libs/dyn/dynvar/ref.go index a048c80cb8..4025521096 100644 --- a/libs/dyn/dynvar/ref.go +++ b/libs/dyn/dynvar/ref.go @@ -1,23 +1,13 @@ package dynvar import ( - "fmt" - "regexp" - + "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" -) - -var ( - // !!! Should be in sync with _variable_regex in Python code. - // !!! - // !!! See python/databricks/bundles/core/_transform.py - baseVarDef = `[a-zA-Z]+([-_]*[a-zA-Z0-9]+)*` - re = regexp.MustCompile(fmt.Sprintf(`\$\{(%s(\.%s(\[[0-9]+\])*)*(\[[0-9]+\])*)\}`, baseVarDef, baseVarDef)) + "github.com/databricks/cli/libs/interpolation" ) // Ref represents a variable reference. // It is a string [dyn.Value] contained in a larger [dyn.Value]. -// Its path within the containing [dyn.Value] is also stored. type Ref struct { // Original value. Value dyn.Value @@ -25,13 +15,13 @@ type Ref struct { // String value in the original [dyn.Value]. Str string - // Matches of the variable reference in the string. - Matches [][]string + // Parsed tokens from the interpolation parser. + Tokens []interpolation.Token } // NewRef returns a new Ref if the given [dyn.Value] contains a string // with one or more variable references. It returns false if the given -// [dyn.Value] does not contain variable references. +// [dyn.Value] does not contain variable references or if parsing fails. // // Examples of a valid variable references: // - "${a.b}" @@ -39,53 +29,118 @@ type Ref struct { // - "${a.b[0].c}" // - "${a} ${b} ${c}" func NewRef(v dyn.Value) (Ref, bool) { + ref, ok, _ := newRef(v) + return ref, ok +} + +// NewRefWithDiagnostics returns a new Ref along with any diagnostics. +// Parse errors for malformed references (e.g. "${foo.bar-}") are returned +// as warnings. The second return value is false when no valid references +// are found (either no references at all, or a parse error occurred). +func NewRefWithDiagnostics(v dyn.Value) (Ref, bool, diag.Diagnostics) { + return newRef(v) +} + +func newRef(v dyn.Value) (Ref, bool, diag.Diagnostics) { s, ok := v.AsString() if !ok { - return Ref{}, false + return Ref{}, false, nil + } + + tokens, err := interpolation.Parse(s) + if err != nil { + // Return parse error as a warning diagnostic. + return Ref{}, false, diag.Diagnostics{{ + Severity: diag.Warning, + Summary: err.Error(), + Locations: v.Locations(), + }} + } + + // Check if any token is a variable reference or if escape sequences + // were processed (the reconstructed string differs from the original). + hasRef := false + for _, t := range tokens { + if t.Kind == interpolation.TokenRef { + hasRef = true + break + } } - // Check if the string contains any variable references. - m := re.FindAllStringSubmatch(s, -1) - if len(m) == 0 { - return Ref{}, false + if !hasRef { + // Even without references, if escape sequences were processed we need + // to return a Ref so the resolver can reconstruct the literal string. + if !hasEscapes(s, tokens) { + return Ref{}, false, nil + } } return Ref{ - Value: v, - Str: s, - Matches: m, - }, true + Value: v, + Str: s, + Tokens: tokens, + }, true, nil } // IsPure returns true if the variable reference contains a single // variable reference and nothing more. We need this so we can // interpolate values of non-string types (i.e. it can be substituted). func (v Ref) IsPure() bool { - // Need single match, equal to the incoming string. - if len(v.Matches) == 0 || len(v.Matches[0]) == 0 { - panic("invalid variable reference; expect at least one match") - } - return v.Matches[0][0] == v.Str + return len(v.Tokens) == 1 && v.Tokens[0].Kind == interpolation.TokenRef } +// References returns the path strings of all variable references. func (v Ref) References() []string { var out []string - for _, m := range v.Matches { - out = append(out, m[1]) + for _, t := range v.Tokens { + if t.Kind == interpolation.TokenRef { + out = append(out, t.Value) + } } return out } +// hasEscapes checks whether escape sequences were processed during parsing. +// Escape processing shortens the output (e.g., `\$` becomes `$`), so if the +// sum of token value lengths is less than the original string length, escapes +// were present. +func hasEscapes(original string, tokens []interpolation.Token) bool { + n := 0 + for _, t := range tokens { + n += len(t.Value) + } + return n < len(original) +} + +// IsPureVariableReference returns true if s is a single variable reference +// with no surrounding text. func IsPureVariableReference(s string) bool { - return len(s) > 0 && re.FindString(s) == s + if len(s) == 0 { + return false + } + tokens, err := interpolation.Parse(s) + if err != nil { + return false + } + return len(tokens) == 1 && tokens[0].Kind == interpolation.TokenRef } +// ContainsVariableReference returns true if s contains at least one variable reference. func ContainsVariableReference(s string) bool { - return re.MatchString(s) + tokens, err := interpolation.Parse(s) + if err != nil { + return false + } + for _, t := range tokens { + if t.Kind == interpolation.TokenRef { + return true + } + } + return false } -// If s is a pure variable reference, this function returns the corresponding -// dyn.Path. Otherwise, it returns false. +// PureReferenceToPath returns the corresponding [dyn.Path] if s is a pure +// variable reference. Otherwise, it returns false. func PureReferenceToPath(s string) (dyn.Path, bool) { ref, ok := NewRef(dyn.V(s)) if !ok { diff --git a/libs/dyn/dynvar/ref_test.go b/libs/dyn/dynvar/ref_test.go index d59d80cba1..3c98e1e474 100644 --- a/libs/dyn/dynvar/ref_test.go +++ b/libs/dyn/dynvar/ref_test.go @@ -3,6 +3,7 @@ package dynvar import ( "testing" + "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -47,6 +48,23 @@ func TestNewRefInvalidPattern(t *testing.T) { } } +func TestNewRefWithDiagnosticsMalformed(t *testing.T) { + v := dyn.NewValue("${foo.bar-}", []dyn.Location{{File: "test.yml", Line: 1, Column: 1}}) + _, ok, diags := NewRefWithDiagnostics(v) + assert.False(t, ok) + require.Len(t, diags, 1) + assert.Equal(t, diag.Warning, diags[0].Severity) + assert.Contains(t, diags[0].Summary, "invalid") +} + +func TestNewRefWithDiagnosticsValid(t *testing.T) { + v := dyn.V("${foo.bar}") + ref, ok, diags := NewRefWithDiagnostics(v) + assert.True(t, ok) + assert.Empty(t, diags) + assert.Equal(t, []string{"foo.bar"}, ref.References()) +} + func TestIsPureVariableReference(t *testing.T) { assert.False(t, IsPureVariableReference("")) assert.False(t, IsPureVariableReference("${foo.bar} suffix")) diff --git a/libs/dyn/dynvar/resolve.go b/libs/dyn/dynvar/resolve.go index b1366d93bb..f811f25394 100644 --- a/libs/dyn/dynvar/resolve.go +++ b/libs/dyn/dynvar/resolve.go @@ -7,6 +7,7 @@ import ( "strings" "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/interpolation" "github.com/databricks/cli/libs/utils" ) @@ -156,30 +157,39 @@ func (r *resolver) resolveRef(ref Ref, seen []string) (dyn.Value, error) { return dyn.NewValue(resolved[0].Value(), ref.Value.Locations()), nil } - // Not pure; perform string interpolation. - for j := range ref.Matches { - // The value is invalid if resolution returned [ErrSkipResolution]. - // We must skip those and leave the original variable reference in place. - if !resolved[j].IsValid() { - continue - } - - // Try to turn the resolved value into a string. - s, ok := resolved[j].AsString() - if !ok { - // Only allow primitive types to be converted to string. - switch resolved[j].Kind() { - case dyn.KindString, dyn.KindBool, dyn.KindInt, dyn.KindFloat, dyn.KindTime, dyn.KindNil: - s = fmt.Sprint(resolved[j].AsAny()) - default: - return dyn.InvalidValue, fmt.Errorf("cannot interpolate non-primitive value of type %s into string", resolved[j].Kind()) + // Not pure; perform token-based string interpolation. + var buf strings.Builder + refIdx := 0 + for _, tok := range ref.Tokens { + switch tok.Kind { + case interpolation.TokenLiteral: + buf.WriteString(tok.Value) + case interpolation.TokenRef: + // The value is invalid if resolution returned [ErrSkipResolution]. + // We must skip those and leave the original variable reference in place. + if !resolved[refIdx].IsValid() { + buf.WriteString("${") + buf.WriteString(tok.Value) + buf.WriteByte('}') + } else { + // Try to turn the resolved value into a string. + s, ok := resolved[refIdx].AsString() + if !ok { + // Only allow primitive types to be converted to string. + switch resolved[refIdx].Kind() { + case dyn.KindString, dyn.KindBool, dyn.KindInt, dyn.KindFloat, dyn.KindTime, dyn.KindNil: + s = fmt.Sprint(resolved[refIdx].AsAny()) + default: + return dyn.InvalidValue, fmt.Errorf("cannot interpolate non-primitive value of type %s into string", resolved[refIdx].Kind()) + } + } + buf.WriteString(s) } + refIdx++ } - - ref.Str = strings.Replace(ref.Str, ref.Matches[j][0], s, 1) } - return dyn.NewValue(ref.Str, ref.Value.Locations()), nil + return dyn.NewValue(buf.String(), ref.Value.Locations()), nil } func (r *resolver) resolveKey(key string, seen []string) (dyn.Value, error) { diff --git a/libs/dyn/dynvar/resolve_test.go b/libs/dyn/dynvar/resolve_test.go index 5b64029bae..598c78852b 100644 --- a/libs/dyn/dynvar/resolve_test.go +++ b/libs/dyn/dynvar/resolve_test.go @@ -393,3 +393,16 @@ func TestResolveSequenceVariable(t *testing.T) { assert.Equal(t, "value1", seq[0].MustString()) assert.Equal(t, "value2", seq[1].MustString()) } + +func TestResolveEscapedRef(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "a": dyn.V("a"), + "b": dyn.V(`\${a}`), + }) + + out, err := dynvar.Resolve(in, dynvar.DefaultLookup(in)) + require.NoError(t, err) + + // Escaped reference should produce literal ${a}. + assert.Equal(t, "${a}", getByPath(t, out, "b").MustString()) +} diff --git a/libs/interpolation/parse.go b/libs/interpolation/parse.go new file mode 100644 index 0000000000..48518c8c32 --- /dev/null +++ b/libs/interpolation/parse.go @@ -0,0 +1,213 @@ +package interpolation + +import ( + "errors" + "fmt" + "regexp" + "strings" +) + +// TokenKind represents the type of a parsed token. +type TokenKind int + +const ( + TokenLiteral TokenKind = iota // Literal text (no interpolation) + TokenRef // Variable reference: content between ${ and } +) + +// Token represents a parsed segment of an interpolation string. +type Token struct { + Kind TokenKind + Value string // For TokenLiteral: the literal text; For TokenRef: the path string (without ${}) + Start int // Start position in original string + End int // End position in original string (exclusive) +} + +const ( + dollarChar = '$' + openBrace = '{' + closeBrace = '}' + backslashChar = '\\' +) + +// keyPattern validates a single key segment in a variable path. +// Matches: [a-zA-Z]+([-_]*[a-zA-Z0-9]+)* +// Examples: "foo", "my-job", "a_b_c", "abc123" +var keyPattern = regexp.MustCompile(`^[a-zA-Z]+([-_]*[a-zA-Z0-9]+)*$`) + +// indexPattern matches one or more [N] index suffixes. +var indexPattern = regexp.MustCompile(`^(\[[0-9]+\])+$`) + +// Parse parses a string that may contain ${...} variable references. +// It returns a slice of tokens representing literal text and variable references. +// +// Escape sequences: +// - "\$" produces a literal "$" +// - "\\" produces a literal "\" +// +// Examples: +// - "hello" -> [Literal("hello")] +// - "${a.b}" -> [Ref("a.b")] +// - "pre ${a.b} post" -> [Literal("pre "), Ref("a.b"), Literal(" post")] +// - "\${a.b}" -> [Literal("${a.b}")] +func Parse(s string) ([]Token, error) { + if len(s) == 0 { + return nil, nil + } + + var tokens []Token + i := 0 + var buf strings.Builder + litStart := 0 // tracks the start position in the original string for the current literal + + flushLiteral := func(end int) { + if buf.Len() > 0 { + tokens = append(tokens, Token{ + Kind: TokenLiteral, + Value: buf.String(), + Start: litStart, + End: end, + }) + buf.Reset() + } + } + + for i < len(s) { + switch s[i] { + case backslashChar: + // Handle escape sequences: \$ -> $, \\ -> \ + if buf.Len() == 0 { + litStart = i + } + if i+1 < len(s) { + switch s[i+1] { + case dollarChar: + buf.WriteByte(dollarChar) + i += 2 + case backslashChar: + buf.WriteByte(backslashChar) + i += 2 + default: + // Not a recognized escape; treat backslash as literal. + buf.WriteByte(backslashChar) + i++ + } + } else { + // Trailing backslash at end of string: treat as literal. + buf.WriteByte(backslashChar) + i++ + } + + case dollarChar: + // We see '$'. Look ahead. + if i+1 >= len(s) { + // Trailing '$' at end of string: treat as literal. + if buf.Len() == 0 { + litStart = i + } + buf.WriteByte(dollarChar) + i++ + continue + } + + if s[i+1] != openBrace { + // '$' not followed by '{': treat as literal. + if buf.Len() == 0 { + litStart = i + } + buf.WriteByte(dollarChar) + i++ + continue + } + + // Start of variable reference "${". + refStart := i + j := i + 2 // skip "${" + + // Scan until closing '}', rejecting nested '${'. + pathStart := j + for j < len(s) && s[j] != closeBrace { + if s[j] == dollarChar && j+1 < len(s) && s[j+1] == openBrace { + return nil, fmt.Errorf( + "nested variable references are not supported (at position %d)", refStart, + ) + } + j++ + } + + if j >= len(s) { + return nil, fmt.Errorf( + "unterminated variable reference at position %d", refStart, + ) + } + + path := s[pathStart:j] + j++ // skip '}' + + if path == "" { + return nil, fmt.Errorf( + "empty variable reference at position %d", refStart, + ) + } + + if err := validatePath(path); err != nil { + return nil, fmt.Errorf( + "invalid variable reference ${%s}: %w", path, err, + ) + } + + flushLiteral(i) + tokens = append(tokens, Token{ + Kind: TokenRef, + Value: path, + Start: refStart, + End: j, + }) + i = j + + default: + if buf.Len() == 0 { + litStart = i + } + buf.WriteByte(s[i]) + i++ + } + } + + flushLiteral(i) + return tokens, nil +} + +// validatePath validates the path inside a ${...} reference by splitting on +// '.' and validating each segment individually. +func validatePath(path string) error { + segments := strings.Split(path, ".") + for _, seg := range segments { + if seg == "" { + return errors.New("invalid path") + } + + // Strip trailing [N] index suffixes to get the key part. + key, idx := splitKeyAndIndex(seg) + + if key == "" { + return fmt.Errorf("invalid key %q", seg) + } + if !keyPattern.MatchString(key) { + return fmt.Errorf("invalid key %q", key) + } + if idx != "" && !indexPattern.MatchString(idx) { + return fmt.Errorf("invalid index in %q", seg) + } + } + return nil +} + +// splitKeyAndIndex splits "foo[0][1]" into ("foo", "[0][1]"). +func splitKeyAndIndex(seg string) (string, string) { + i := strings.IndexByte(seg, '[') + if i < 0 { + return seg, "" + } + return seg[:i], seg[i:] +} diff --git a/libs/interpolation/parse_test.go b/libs/interpolation/parse_test.go new file mode 100644 index 0000000000..565f13b2e1 --- /dev/null +++ b/libs/interpolation/parse_test.go @@ -0,0 +1,267 @@ +package interpolation + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseEmpty(t *testing.T) { + tokens, err := Parse("") + require.NoError(t, err) + assert.Nil(t, tokens) +} + +func TestParseLiteralOnly(t *testing.T) { + tokens, err := Parse("hello world") + require.NoError(t, err) + assert.Equal(t, []Token{ + {Kind: TokenLiteral, Value: "hello world", Start: 0, End: 11}, + }, tokens) +} + +func TestParseSingleRef(t *testing.T) { + tokens, err := Parse("${a.b}") + require.NoError(t, err) + assert.Equal(t, []Token{ + {Kind: TokenRef, Value: "a.b", Start: 0, End: 6}, + }, tokens) +} + +func TestParseMultipleRefs(t *testing.T) { + tokens, err := Parse("${a} ${b}") + require.NoError(t, err) + assert.Equal(t, []Token{ + {Kind: TokenRef, Value: "a", Start: 0, End: 4}, + {Kind: TokenLiteral, Value: " ", Start: 4, End: 5}, + {Kind: TokenRef, Value: "b", Start: 5, End: 9}, + }, tokens) +} + +func TestParseMixedLiteralAndRef(t *testing.T) { + tokens, err := Parse("pre ${a.b} post") + require.NoError(t, err) + assert.Equal(t, []Token{ + {Kind: TokenLiteral, Value: "pre ", Start: 0, End: 4}, + {Kind: TokenRef, Value: "a.b", Start: 4, End: 10}, + {Kind: TokenLiteral, Value: " post", Start: 10, End: 15}, + }, tokens) +} + +func TestParseValidPaths(t *testing.T) { + tests := []struct { + input string + path string + }{ + {"${a}", "a"}, + {"${abc}", "abc"}, + {"${a.b.c}", "a.b.c"}, + {"${a.b[0]}", "a.b[0]"}, + {"${a[0]}", "a[0]"}, + {"${a.b[0][1]}", "a.b[0][1]"}, + {"${a.b-c}", "a.b-c"}, + {"${a.b_c}", "a.b_c"}, + {"${a.b-c-d}", "a.b-c-d"}, + {"${a.b_c_d}", "a.b_c_d"}, + {"${abc.def.ghi}", "abc.def.ghi"}, + {"${a.b123}", "a.b123"}, + {"${resources.jobs.my-job.id}", "resources.jobs.my-job.id"}, + {"${var.my_var}", "var.my_var"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + tokens, err := Parse(tt.input) + require.NoError(t, err) + require.Len(t, tokens, 1) + assert.Equal(t, TokenRef, tokens[0].Kind) + assert.Equal(t, tt.path, tokens[0].Value) + }) + } +} + +func TestParseEscapeSequences(t *testing.T) { + tests := []struct { + name string + input string + tokens []Token + }{ + { + "escaped_dollar", + `\$`, + []Token{{Kind: TokenLiteral, Value: "$", Start: 0, End: 2}}, + }, + { + "escaped_ref", + `\${a}`, + []Token{{Kind: TokenLiteral, Value: "${a}", Start: 0, End: 5}}, + }, + { + "escaped_backslash", + `\\`, + []Token{{Kind: TokenLiteral, Value: `\`, Start: 0, End: 2}}, + }, + { + "double_escaped_backslash", + `\\\\`, + []Token{{Kind: TokenLiteral, Value: `\\`, Start: 0, End: 4}}, + }, + { + "escaped_backslash_then_ref", + `\\${a.b}`, + []Token{ + {Kind: TokenLiteral, Value: `\`, Start: 0, End: 2}, + {Kind: TokenRef, Value: "a.b", Start: 2, End: 8}, + }, + }, + { + "backslash_before_non_special", + `\n`, + []Token{{Kind: TokenLiteral, Value: `\n`, Start: 0, End: 2}}, + }, + { + "escaped_dollar_then_ref", + `\$\$${a.b}`, + []Token{ + {Kind: TokenLiteral, Value: "$$", Start: 0, End: 4}, + {Kind: TokenRef, Value: "a.b", Start: 4, End: 10}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tokens, err := Parse(tt.input) + require.NoError(t, err) + assert.Equal(t, tt.tokens, tokens) + }) + } +} + +func TestParseDollarAtEnd(t *testing.T) { + tokens, err := Parse("abc$") + require.NoError(t, err) + assert.Equal(t, []Token{ + {Kind: TokenLiteral, Value: "abc$", Start: 0, End: 4}, + }, tokens) +} + +func TestParseDollarBeforeNonBrace(t *testing.T) { + tokens, err := Parse("$x") + require.NoError(t, err) + assert.Equal(t, []Token{ + {Kind: TokenLiteral, Value: "$x", Start: 0, End: 2}, + }, tokens) +} + +func TestParseBackslashAtEnd(t *testing.T) { + tokens, err := Parse(`abc\`) + require.NoError(t, err) + assert.Equal(t, []Token{ + {Kind: TokenLiteral, Value: `abc\`, Start: 0, End: 4}, + }, tokens) +} + +func TestParseNestedReferenceReturnsError(t *testing.T) { + _, err := Parse("${var.foo_${var.tail}}") + require.Error(t, err) + assert.Contains(t, err.Error(), "nested variable references are not supported") +} + +func TestParseUnterminatedRef(t *testing.T) { + _, err := Parse("${a.b") + require.Error(t, err) + assert.Contains(t, err.Error(), "unterminated") +} + +func TestParseEmptyRef(t *testing.T) { + _, err := Parse("${}") + require.Error(t, err) + assert.Contains(t, err.Error(), "empty") +} + +func TestParseInvalidPaths(t *testing.T) { + tests := []struct { + name string + input string + }{ + {"trailing_hyphen", "${foo.bar-}"}, + {"double_dot", "${foo..bar}"}, + {"leading_digit", "${0foo}"}, + {"hyphen_start_segment", "${foo.-bar}"}, + {"trailing_dot", "${foo.}"}, + {"leading_dot", "${.foo}"}, + {"space_in_path", "${foo. bar}"}, + {"special_char", "${foo.bar!}"}, + {"just_digits", "${123}"}, + {"trailing_underscore", "${foo.bar_}"}, + {"underscore_start_segment", "${foo._bar}"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := Parse(tt.input) + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid") + }) + } +} + +func TestParsePositions(t *testing.T) { + tests := []struct { + name string + input string + tokens []Token + }{ + { + "single_ref", + "${a.b}", + []Token{{Kind: TokenRef, Value: "a.b", Start: 0, End: 6}}, + }, + { + "literal_ref_literal", + "pre ${a.b} post", + []Token{ + {Kind: TokenLiteral, Value: "pre ", Start: 0, End: 4}, + {Kind: TokenRef, Value: "a.b", Start: 4, End: 10}, + {Kind: TokenLiteral, Value: " post", Start: 10, End: 15}, + }, + }, + { + "escaped_ref", + `\${a}`, + []Token{{Kind: TokenLiteral, Value: "${a}", Start: 0, End: 5}}, + }, + { + "adjacent_refs", + "${a}${b}", + []Token{ + {Kind: TokenRef, Value: "a", Start: 0, End: 4}, + {Kind: TokenRef, Value: "b", Start: 4, End: 8}, + }, + }, + { + "dollar_sign_mid_literal", + "a$b", + []Token{{Kind: TokenLiteral, Value: "a$b", Start: 0, End: 3}}, + }, + { + "escape_between_refs", + `${a}\$${b}`, + []Token{ + {Kind: TokenRef, Value: "a", Start: 0, End: 4}, + {Kind: TokenLiteral, Value: "$", Start: 4, End: 6}, + {Kind: TokenRef, Value: "b", Start: 6, End: 10}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tokens, err := Parse(tt.input) + require.NoError(t, err) + assert.Equal(t, tt.tokens, tokens) + }) + } +} From 386481b7548c8d5b87220cdbe220fc3bd54637a9 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 16 Mar 2026 00:32:22 +0100 Subject: [PATCH 2/3] Add "did you mean" suggestions for unresolved variable references - Add Levenshtein distance and ClosestMatch in libs/textutil/ (dyn-independent) - Add SuggestPath in libs/dyn/dynvar/ for fuzzy path walking - Wire SuggestFn into Resolve via WithSuggestFn option - Add var <-> variables.*.value bidirectional rewriting for suggestions - Add acceptance test for did-you-mean suggestions Co-authored-by: Isaac --- .../variables/did_you_mean/databricks.yml | 11 +++ .../variables/did_you_mean/out.test.toml | 5 + .../bundle/variables/did_you_mean/output.txt | 13 +++ .../bundle/variables/did_you_mean/script | 1 + .../mutator/resolve_variable_references.go | 34 ++++++- design/variable-lookup-suggestions.md | 52 +++++++++++ libs/dyn/dynvar/resolve.go | 33 ++++++- libs/dyn/dynvar/resolve_test.go | 17 ++++ libs/dyn/dynvar/suggest.go | 55 +++++++++++ libs/dyn/dynvar/suggest_test.go | 91 +++++++++++++++++++ libs/textutil/levenshtein.go | 55 +++++++++++ libs/textutil/levenshtein_test.go | 59 ++++++++++++ 12 files changed, 422 insertions(+), 4 deletions(-) create mode 100644 acceptance/bundle/variables/did_you_mean/databricks.yml create mode 100644 acceptance/bundle/variables/did_you_mean/out.test.toml create mode 100644 acceptance/bundle/variables/did_you_mean/output.txt create mode 100644 acceptance/bundle/variables/did_you_mean/script create mode 100644 design/variable-lookup-suggestions.md create mode 100644 libs/dyn/dynvar/suggest.go create mode 100644 libs/dyn/dynvar/suggest_test.go create mode 100644 libs/textutil/levenshtein.go create mode 100644 libs/textutil/levenshtein_test.go diff --git a/acceptance/bundle/variables/did_you_mean/databricks.yml b/acceptance/bundle/variables/did_you_mean/databricks.yml new file mode 100644 index 0000000000..ff4da90ba8 --- /dev/null +++ b/acceptance/bundle/variables/did_you_mean/databricks.yml @@ -0,0 +1,11 @@ +bundle: + name: did-you-mean + +variables: + my_cluster_id: + default: abc123 + +resources: + jobs: + my_job: + name: "${var.my_clster_id}" diff --git a/acceptance/bundle/variables/did_you_mean/out.test.toml b/acceptance/bundle/variables/did_you_mean/out.test.toml new file mode 100644 index 0000000000..d560f1de04 --- /dev/null +++ b/acceptance/bundle/variables/did_you_mean/out.test.toml @@ -0,0 +1,5 @@ +Local = true +Cloud = false + +[EnvMatrix] + DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/variables/did_you_mean/output.txt b/acceptance/bundle/variables/did_you_mean/output.txt new file mode 100644 index 0000000000..b504098ca5 --- /dev/null +++ b/acceptance/bundle/variables/did_you_mean/output.txt @@ -0,0 +1,13 @@ + +>>> [CLI] bundle validate +Error: reference does not exist: ${var.my_clster_id}. did you mean ${var.my_cluster_id}? + +Name: did-you-mean +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/did-you-mean/default + +Found 1 error + +Exit code: 1 diff --git a/acceptance/bundle/variables/did_you_mean/script b/acceptance/bundle/variables/did_you_mean/script new file mode 100644 index 0000000000..5350876150 --- /dev/null +++ b/acceptance/bundle/variables/did_you_mean/script @@ -0,0 +1 @@ +trace $CLI bundle validate diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 9868b4fb95..43090691f8 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -202,6 +202,38 @@ func (m *resolveVariableReferences) resolveOnce(b *bundle.Bundle, prefixes []dyn // normalized, _ := convert.Normalize(b.Config, root, convert.IncludeMissingFields) + suggestFn := dynvar.WithSuggestFn(func(p dyn.Path) string { + // Rewrite var.X -> variables.X.value for suggestion lookup, + // then convert the suggestion back to var.X form. + isVar := p.HasPrefix(varPath) + if isVar { + newPath := dyn.NewPath(dyn.Key("variables"), p[1], dyn.Key("value")) + if len(p) > 2 { + newPath = newPath.Append(p[2:]...) + } + p = newPath + } + suggestion := dynvar.SuggestPath(normalized, p) + if suggestion == "" { + return "" + } + // Convert variables.X.value back to var.X for user-facing messages. + if isVar { + sp, err := dyn.NewPathFromString(suggestion) + if err != nil { + return suggestion + } + variablesPrefix := dyn.NewPath(dyn.Key("variables")) + valueSuffix := dyn.NewPath(dyn.Key("value")) + if rest, ok := sp.CutPrefix(variablesPrefix); ok { + if rest, ok := rest.CutSuffix(valueSuffix); ok { + return dyn.NewPath(dyn.Key("var")).Append(rest...).String() + } + } + } + return suggestion + }) + // If the pattern is nil, we resolve references in the entire configuration. root, err := dyn.MapByPattern(root, m.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { // Resolve variable references in all values. @@ -236,7 +268,7 @@ func (m *resolveVariableReferences) resolveOnce(b *bundle.Bundle, prefixes []dyn } return dyn.InvalidValue, dynvar.ErrSkipResolution - }) + }, suggestFn) }) if err != nil { return dyn.InvalidValue, err diff --git a/design/variable-lookup-suggestions.md b/design/variable-lookup-suggestions.md new file mode 100644 index 0000000000..9e0bf46cae --- /dev/null +++ b/design/variable-lookup-suggestions.md @@ -0,0 +1,52 @@ +# "Did You Mean?" Suggestions for Invalid Variable References + +Author: Shreyas Goenka +Date: 12 March 2026 + +## Problem + +`${bundle.git.origin_urlx}` produces `reference does not exist` with no hint. +A single character typo in a long path can take minutes to spot. + +## Design: Fuzzy Path Walk + +When a lookup fails with `NoSuchKeyError`, we do a separate fuzzy walk of the +normalized config tree (which includes all struct-defined fields via +`IncludeMissingFields` + all user-defined map keys). + +The walk processes every component independently: +1. Exact key match -> follow it +2. No exact match -> Levenshtein fuzzy match against siblings -> follow best match +3. Index components (`[0]`) -> pass through verbatim +4. Any component unfixable (all candidates too far) -> give up, no suggestion + +This corrects **multiple** typos simultaneously (e.g., `${resources.jbs.my_jb.id}` +-> `did you mean ${resources.jobs.my_job.id}?`). + +Distance threshold: `min(3, max(1, len(key)/2))`. + +Core fuzzy matching (`Levenshtein`, `ClosestMatch`) lives in `libs/textutil/` +and is independent of the `dyn` package. The `SuggestPath` function in +`libs/dyn/dynvar/` uses these functions with dyn tree walking. + +## Wiring + +The suggestion callback is passed via `dynvar.WithSuggestFn(...)` into +`dynvar.Resolve`. On `NoSuchKeyError` in `resolveKey`, the suggestion is +appended to the error message. + +## `var` Shorthand + +`${var.foo}` is rewritten to `${variables.foo.value}` before lookup. The +`SuggestFn` in `resolve_variable_references.go` handles this bidirectionally: +rewrite `var.X` -> `variables.X.value` for the fuzzy walk, then convert the +suggestion back to `var.X` form for the user-facing message. + +## Scope + +**Covered**: typos in struct fields, user-defined names, resource types/instances, +multi-level typos. + +**Not covered**: malformed references (handled by the parser), cross-section +suggestions (user writes `${bundle.X}` meaning `${var.X}`), array index +out of bounds. diff --git a/libs/dyn/dynvar/resolve.go b/libs/dyn/dynvar/resolve.go index f811f25394..f03ab8796c 100644 --- a/libs/dyn/dynvar/resolve.go +++ b/libs/dyn/dynvar/resolve.go @@ -11,6 +11,21 @@ import ( "github.com/databricks/cli/libs/utils" ) +// SuggestFn is a function that returns a suggested correction for a reference path. +// It returns an empty string if no suggestion is available. +type SuggestFn func(path dyn.Path) string + +// ResolveOption configures optional behavior for [Resolve]. +type ResolveOption func(*resolver) + +// WithSuggestFn configures a suggestion function that is called when a +// reference does not exist. The suggestion is appended to the error message. +func WithSuggestFn(fn SuggestFn) ResolveOption { + return func(r *resolver) { + r.suggestFn = fn + } +} + // Resolve resolves variable references in the given input value using the provided lookup function. // It returns the resolved output value and any error encountered during the resolution process. // @@ -34,8 +49,12 @@ import ( // If a cycle is detected in the variable references, an error is returned. // If for some path the resolution function returns [ErrSkipResolution], the variable reference is left in place. // This is useful when some variable references are not yet ready to be interpolated. -func Resolve(in dyn.Value, fn Lookup) (out dyn.Value, err error) { - return resolver{in: in, fn: fn}.run() +func Resolve(in dyn.Value, fn Lookup, opts ...ResolveOption) (out dyn.Value, err error) { + r := resolver{in: in, fn: fn} + for _, opt := range opts { + opt(&r) + } + return r.run() } type lookupResult struct { @@ -47,6 +66,8 @@ type resolver struct { in dyn.Value fn Lookup + suggestFn SuggestFn + refs map[string]Ref resolved map[string]dyn.Value @@ -208,7 +229,13 @@ func (r *resolver) resolveKey(key string, seen []string) (dyn.Value, error) { v, err := r.fn(p) if err != nil { if dyn.IsNoSuchKeyError(err) { - err = fmt.Errorf("reference does not exist: ${%s}", key) + msg := fmt.Sprintf("reference does not exist: ${%s}", key) + if r.suggestFn != nil { + if suggestion := r.suggestFn(p); suggestion != "" { + msg += fmt.Sprintf(". did you mean ${%s}?", suggestion) + } + } + err = errors.New(msg) } // Cache the return value and return to the caller. diff --git a/libs/dyn/dynvar/resolve_test.go b/libs/dyn/dynvar/resolve_test.go index 598c78852b..e5a743057f 100644 --- a/libs/dyn/dynvar/resolve_test.go +++ b/libs/dyn/dynvar/resolve_test.go @@ -406,3 +406,20 @@ func TestResolveEscapedRef(t *testing.T) { // Escaped reference should produce literal ${a}. assert.Equal(t, "${a}", getByPath(t, out, "b").MustString()) } + +func TestResolveSuggestFn(t *testing.T) { + in := dyn.V(map[string]dyn.Value{ + "cluster": dyn.V(map[string]dyn.Value{ + "name": dyn.V("hello"), + }), + "ref": dyn.V("${cluster.nme}"), + }) + + suggest := dynvar.WithSuggestFn(func(p dyn.Path) string { + return dynvar.SuggestPath(in, p) + }) + + _, err := dynvar.Resolve(in, dynvar.DefaultLookup(in), suggest) + require.Error(t, err) + assert.Contains(t, err.Error(), "did you mean ${cluster.name}?") +} diff --git a/libs/dyn/dynvar/suggest.go b/libs/dyn/dynvar/suggest.go new file mode 100644 index 0000000000..b859e5a09e --- /dev/null +++ b/libs/dyn/dynvar/suggest.go @@ -0,0 +1,55 @@ +package dynvar + +import ( + "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/textutil" +) + +// SuggestPath walks the reference path against root, correcting mistyped key +// components via fuzzy matching. Returns the corrected path string, or "" if +// the path cannot be corrected. +func SuggestPath(root dyn.Value, refPath dyn.Path) string { + cur := root + suggested := dyn.EmptyPath + + for _, c := range refPath { + if c.Key() != "" { + m, ok := cur.AsMap() + if !ok { + return "" + } + + key := c.Key() + if v, ok := m.GetByString(key); ok { + suggested = suggested.Append(dyn.Key(key)) + cur = v + continue + } + + // Collect candidate keys for fuzzy matching. + pairs := m.Pairs() + candidates := make([]string, len(pairs)) + for i, p := range pairs { + candidates[i] = p.Key.MustString() + } + + match, _ := textutil.ClosestMatch(key, candidates) + if match == "" { + return "" + } + + v, _ := m.GetByString(match) + suggested = suggested.Append(dyn.Key(match)) + cur = v + } else { + seq, ok := cur.AsSequence() + if !ok || c.Index() < 0 || c.Index() >= len(seq) { + return "" + } + suggested = suggested.Append(dyn.Index(c.Index())) + cur = seq[c.Index()] + } + } + + return suggested.String() +} diff --git a/libs/dyn/dynvar/suggest_test.go b/libs/dyn/dynvar/suggest_test.go new file mode 100644 index 0000000000..e6186ed581 --- /dev/null +++ b/libs/dyn/dynvar/suggest_test.go @@ -0,0 +1,91 @@ +package dynvar + +import ( + "testing" + + "github.com/databricks/cli/libs/dyn" + "github.com/stretchr/testify/assert" +) + +func TestSuggestPathSingleKeyTypo(t *testing.T) { + root := dyn.V(map[string]dyn.Value{ + "bundle": dyn.V(map[string]dyn.Value{ + "name": dyn.V("my-bundle"), + }), + }) + + suggestion := SuggestPath(root, dyn.MustPathFromString("bndle.name")) + assert.Equal(t, "bundle.name", suggestion) +} + +func TestSuggestPathMultiLevelTypo(t *testing.T) { + root := dyn.V(map[string]dyn.Value{ + "resources": dyn.V(map[string]dyn.Value{ + "jobs": dyn.V(map[string]dyn.Value{ + "my_job": dyn.V(map[string]dyn.Value{ + "id": dyn.V("123"), + }), + }), + }), + }) + + suggestion := SuggestPath(root, dyn.MustPathFromString("resources.jbs.my_jb.id")) + assert.Equal(t, "resources.jobs.my_job.id", suggestion) +} + +func TestSuggestPathExactMatch(t *testing.T) { + root := dyn.V(map[string]dyn.Value{ + "var": dyn.V(map[string]dyn.Value{ + "my_var": dyn.V("hello"), + }), + }) + + suggestion := SuggestPath(root, dyn.MustPathFromString("var.my_var")) + assert.Equal(t, "var.my_var", suggestion) +} + +func TestSuggestPathNoMatch(t *testing.T) { + root := dyn.V(map[string]dyn.Value{ + "bundle": dyn.V(map[string]dyn.Value{ + "name": dyn.V("my-bundle"), + }), + }) + + suggestion := SuggestPath(root, dyn.MustPathFromString("zzzzzzz.name")) + assert.Equal(t, "", suggestion) +} + +func TestSuggestPathIntoNonMap(t *testing.T) { + root := dyn.V(map[string]dyn.Value{ + "name": dyn.V("hello"), + }) + + suggestion := SuggestPath(root, dyn.MustPathFromString("name.child")) + assert.Equal(t, "", suggestion) +} + +func TestSuggestPathIndexPassthrough(t *testing.T) { + root := dyn.V(map[string]dyn.Value{ + "tasks": dyn.V([]dyn.Value{ + dyn.V(map[string]dyn.Value{ + "name": dyn.V("task1"), + }), + }), + }) + + suggestion := SuggestPath(root, dyn.MustPathFromString("tasks[0].nme")) + assert.Equal(t, "tasks[0].name", suggestion) +} + +func TestSuggestPathNested(t *testing.T) { + root := dyn.V(map[string]dyn.Value{ + "variables": dyn.V(map[string]dyn.Value{ + "my_cluster_id": dyn.V(map[string]dyn.Value{ + "value": dyn.V("abc-123"), + }), + }), + }) + + suggestion := SuggestPath(root, dyn.MustPathFromString("variables.my_clster_id.value")) + assert.Equal(t, "variables.my_cluster_id.value", suggestion) +} diff --git a/libs/textutil/levenshtein.go b/libs/textutil/levenshtein.go new file mode 100644 index 0000000000..239bb9a5cf --- /dev/null +++ b/libs/textutil/levenshtein.go @@ -0,0 +1,55 @@ +package textutil + +// Levenshtein computes the edit distance between two strings using single-row DP. +func Levenshtein(a, b string) int { + if len(a) < len(b) { + a, b = b, a + } + + row := make([]int, len(b)+1) + for j := range row { + row[j] = j + } + + for i := range len(a) { + prev := row[0] + row[0] = i + 1 + for j := range len(b) { + cost := 1 + if a[i] == b[j] { + cost = 0 + } + tmp := row[j+1] + row[j+1] = min( + row[j+1]+1, // deletion + row[j]+1, // insertion + prev+cost, // substitution + ) + prev = tmp + } + } + + return row[len(b)] +} + +// ClosestMatch finds the candidate with the smallest edit distance to key, +// within a threshold of min(3, max(1, len(key)/2)). Returns ("", 0) if no +// candidate is within threshold. +func ClosestMatch(key string, candidates []string) (string, int) { + threshold := min(3, max(1, len(key)/2)) + bestDist := threshold + 1 + bestMatch := "" + + for _, c := range candidates { + d := Levenshtein(key, c) + if d < bestDist { + bestDist = d + bestMatch = c + } + } + + if bestMatch == "" { + return "", 0 + } + return bestMatch, bestDist +} diff --git a/libs/textutil/levenshtein_test.go b/libs/textutil/levenshtein_test.go new file mode 100644 index 0000000000..530cc6a90f --- /dev/null +++ b/libs/textutil/levenshtein_test.go @@ -0,0 +1,59 @@ +package textutil + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestLevenshtein(t *testing.T) { + tests := []struct { + a, b string + dist int + }{ + {"", "", 0}, + {"a", "", 1}, + {"", "a", 1}, + {"abc", "abc", 0}, + {"abc", "abd", 1}, + {"abc", "ab", 1}, + {"kitten", "sitting", 3}, + {"saturday", "sunday", 3}, + {"my_cluster_id", "my_clster_id", 1}, + } + + for _, tt := range tests { + t.Run(tt.a+"_"+tt.b, func(t *testing.T) { + assert.Equal(t, tt.dist, Levenshtein(tt.a, tt.b)) + // Symmetric. + assert.Equal(t, tt.dist, Levenshtein(tt.b, tt.a)) + }) + } +} + +func TestClosestMatch(t *testing.T) { + candidates := []string{"cluster_id", "cluster_name", "node_type"} + + t.Run("close_match", func(t *testing.T) { + match, dist := ClosestMatch("cluser_id", candidates) + assert.Equal(t, "cluster_id", match) + assert.Equal(t, 1, dist) + }) + + t.Run("no_match", func(t *testing.T) { + match, _ := ClosestMatch("zzzzzzz", candidates) + assert.Equal(t, "", match) + }) + + t.Run("exact_match", func(t *testing.T) { + match, dist := ClosestMatch("cluster_id", candidates) + assert.Equal(t, "cluster_id", match) + assert.Equal(t, 0, dist) + }) + + t.Run("short_key_threshold", func(t *testing.T) { + // For a 2-char key, threshold = min(3, max(1, 1)) = 1 + match, _ := ClosestMatch("ab", []string{"ac", "zz"}) + assert.Equal(t, "ac", match) + }) +} From faef78c3ce74584eacc82fbaf121db70a80fd2b5 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 16 Mar 2026 00:54:46 +0100 Subject: [PATCH 3/3] Update var_in_var test for nested ${} rejection The var_in_var test used ${var.foo_${var.tail}} which relied on undocumented nested reference behavior (the test itself noted: "not officially supported... might start to reject this in the future"). The new parser now rejects nested ${} constructs as intended. Co-authored-by: Isaac --- acceptance/bundle/variables/var_in_var/output.txt | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/acceptance/bundle/variables/var_in_var/output.txt b/acceptance/bundle/variables/var_in_var/output.txt index 27ec6e6ab1..2d284c76cc 100644 --- a/acceptance/bundle/variables/var_in_var/output.txt +++ b/acceptance/bundle/variables/var_in_var/output.txt @@ -1,9 +1,12 @@ >>> [CLI] bundle validate -o json -t target_x +Warning: nested variable references are not supported (at position 0) + in databricks.yml:14:14 + { "final": { - "default": "hello from foo x", - "value": "hello from foo x" + "default": "${var.foo_${var.tail}}", + "value": "${var.foo_${var.tail}}" }, "foo_x": { "default": "hello from foo x", @@ -20,10 +23,13 @@ } >>> [CLI] bundle validate -o json -t target_y +Warning: nested variable references are not supported (at position 0) + in databricks.yml:14:14 + { "final": { - "default": "hi from foo y", - "value": "hi from foo y" + "default": "${var.foo_${var.tail}}", + "value": "${var.foo_${var.tail}}" }, "foo_x": { "default": "hello from foo x",