diff --git a/docs/adr/43409-post-parse-diagnostic-improvement-for-tool-config.md b/docs/adr/43409-post-parse-diagnostic-improvement-for-tool-config.md new file mode 100644 index 00000000000..51958fb7870 --- /dev/null +++ b/docs/adr/43409-post-parse-diagnostic-improvement-for-tool-config.md @@ -0,0 +1,61 @@ +# ADR-43409: Post-Parse Diagnostic Improvement for Tool Config Shape Violations + +**Date**: 2026-07-05 +**Status**: Draft +**Deciders**: Unknown (generated from PR diff) + +--- + +### Context + +When a user writes a workflow frontmatter `tools.` key as a scalar string (e.g. `github: "invalid-string"`) and also includes nested child keys beneath it (e.g. `toolsets: [default]`), the upstream YAML parser produces two quality problems: + +1. **Wrong source location** — the error caret lands on the child key line rather than on the scalar value that is the actual mistake. +2. **Parser-internal phrasing** — the raw message ("value is not allowed in this context. map key-value is pre-defined") is meaningless to workflow authors and doesn't name the tool key involved. + +The compiler also failed to classify this diagnostic as a syntax error, causing it to surface generic event/filter remediation hints instead of syntax remediation guidance. These combined issues made the error experience confusing and hard to act on. + +### Decision + +We will intercept and rewrite affected YAML diagnostics in a new post-parse adjustment layer (`improveFrontmatterDiagnostic` in `pkg/workflow/frontmatter_error.go`) rather than modifying the upstream YAML parser or catching the pattern pre-parse. This function detects the scalar-with-nested-key diagnostic pattern, re-anchors the reported position to the scalar value line, and replaces the message with a tool-aware phrase: `tools. tool config must be an object, not a string (for example: toolsets: [default])`. The error classifier is also extended to recognise the translated message as a syntax-category error. + +### Alternatives Considered + +#### Alternative 1: Modify the Upstream YAML Parser + +Patch the third-party YAML parser to emit the error at the parent scalar line with a user-friendly message at parse time. + +Considered because it would fix the issue at the root cause. Not chosen because it requires forking or upstreaming a change to an external library, is high-risk for regressions across all YAML error paths, and is disproportionate effort for a single error pattern. + +#### Alternative 2: Pre-Parse Structural Validation + +Before invoking the YAML parser, scan the raw frontmatter text for the `key: "scalar"\n child:` pattern and produce a custom error without involving the YAML parser at all. + +Considered because it avoids dependence on parser error message text (which can change). Not chosen because it duplicates partial YAML understanding in the pre-parse layer, cannot leverage the parser's existing line/column tracking, and adds a fragile regex-based YAML structural check outside the normal parse pipeline. + +#### Alternative 3: Generic "Review Your Tools Config" Fallback Message + +When the parser emits this error pattern, replace the message with a generic "check your tools configuration" hint without attempting to re-anchor the source position. + +Considered as a low-effort improvement. Not chosen because it still leaves the caret on the wrong line, which is the more confusing of the two problems, and because the tool name is available from context and makes the message significantly more actionable. + +### Consequences + +#### Positive +- Workflow authors see the error caret on the line they actually need to fix (the scalar value line) rather than on an unrelated child key line. +- The message names the specific tool key involved (`tools.github`) and provides a concrete correct example, reducing iteration time to fix the mistake. +- The fix is isolated to the error-formatting layer and does not affect the parse, compile, or validation pipelines. +- The syntax-category classification ensures users receive the correct remediation hint ("Fix the YAML/frontmatter syntax first") instead of an unrelated event/filter hint. + +#### Negative +- The detection heuristic relies on matching translated message text and regex-parsing source lines; if the upstream parser changes its raw message or the translation changes, the heuristic silently stops firing (falling back to the original, lower-quality diagnostic). +- The approach is specific to the `tools.` ancestor context; other scalar-with-children patterns elsewhere in frontmatter are not improved. +- `frontmatter_error.go` now carries a third special-case adjustment path, increasing the maintenance surface of the error formatting layer. + +#### Neutral +- A custom `renderSourceContextForPosition` helper is added to re-render the source context snippet when the position is re-anchored, keeping the VSCode-compatible output format consistent. +- The `parsePositiveInt` helper duplicates a simple int-parsing pattern rather than importing a shared utility, keeping the dependency surface minimal. + +--- + +*ADR created by [adr-writer agent]. Review and finalize before changing status from Draft to Accepted.* diff --git a/pkg/parser/yaml_error.go b/pkg/parser/yaml_error.go index 6820fa0a7ae..fb9f4e6c67c 100644 --- a/pkg/parser/yaml_error.go +++ b/pkg/parser/yaml_error.go @@ -41,6 +41,10 @@ var yamlErrorTranslations = []struct { "mapping values are not allowed", "unexpected ':' — check indentation or if this key belongs in a mapping block", }, + { + "value is not allowed in this context. map key-value is pre-defined", + "value cannot have child keys here — this key must be an object/mapping, not a scalar value", + }, { "string was used where mapping is expected", "expected a YAML mapping (key: value pairs) but got a plain string", diff --git a/pkg/parser/yaml_error_test.go b/pkg/parser/yaml_error_test.go index 9f841484ca8..a86b845b10b 100644 --- a/pkg/parser/yaml_error_test.go +++ b/pkg/parser/yaml_error_test.go @@ -267,6 +267,12 @@ func TestTranslateYAMLMessage(t *testing.T) { contains: "missing ':' after key", excludes: "mapping value not found", }, + { + name: "scalar with nested key diagnostic", + input: "value is not allowed in this context. map key-value is pre-defined", + contains: "must be an object/mapping", + excludes: "map key-value is pre-defined", + }, { name: "found character that cannot start any token", input: "found character that cannot start any token", diff --git a/pkg/workflow/compiler_yaml_test.go b/pkg/workflow/compiler_yaml_test.go index f07502f6df2..6bf6612e63b 100644 --- a/pkg/workflow/compiler_yaml_test.go +++ b/pkg/workflow/compiler_yaml_test.go @@ -194,6 +194,25 @@ Invalid YAML with missing comma in array.`, expectedMessagePart: "',' or ']' must be specified", description: "missing comma in array should be detected", }, + { + name: "github_tool_scalar_with_nested_key", + content: `--- +on: push +tools: + github: "invalid-string" + toolsets: [default] +engine: claude +strict: false +--- + +# Test Workflow + +Invalid YAML with scalar github tool config that has nested keys.`, + expectedErrorLine: 4, // highlight the invalid github scalar value line, not the nested key line + expectedErrorColumn: 11, + expectedMessagePart: "tools.github tool config must be a mapping (object), not a scalar value", + description: "invalid github tool scalar should point to the scalar line with actionable message", + }, { name: "mixed_tabs_and_spaces", content: "---\non: push\npermissions:\n contents: read\n\tissues: write\nengine: claude\n---\n\n# Test Workflow\n\nInvalid YAML with mixed tabs and spaces.", diff --git a/pkg/workflow/error_recovery.go b/pkg/workflow/error_recovery.go index 9956dc65dc3..9fff8526f8d 100644 --- a/pkg/workflow/error_recovery.go +++ b/pkg/workflow/error_recovery.go @@ -267,7 +267,10 @@ func classifyErrorMessage(message string) PrioritizedError { strings.Contains(lower, "failed to parse yaml frontmatter"), strings.Contains(lower, "no frontmatter found"), strings.Contains(lower, "mapping values are not allowed"), - strings.Contains(lower, "did not find expected key"): + strings.Contains(lower, "did not find expected key"), + strings.Contains(lower, "missing ':' after key"), + strings.Contains(lower, "unexpected ':'"), + strings.Contains(lower, "tool config must be a mapping"): return PrioritizedError{ Message: message, Severity: SeverityCritical, diff --git a/pkg/workflow/error_recovery_test.go b/pkg/workflow/error_recovery_test.go index 8c52d914e0f..53d48365c4f 100644 --- a/pkg/workflow/error_recovery_test.go +++ b/pkg/workflow/error_recovery_test.go @@ -149,3 +149,36 @@ func TestBuildPrioritizedErrorReportFromMessages_UnknownPermissionScopeHintPrese require.Len(t, report.DisplayedErrors, 1) assert.Equal(t, "Remove or replace the unknown permission scope `Contents` in `permissions:` and re-run `gh aw compile`.", report.DisplayedErrors[0].Suggestion) } + +func TestBuildPrioritizedErrorReportFromMessages_YAMLSyntaxGetsSyntaxHint(t *testing.T) { + message := `/tmp/workflow.md:3:1: error: missing ':' after key +2 | on: push +3 | permissions + ^` + + report := BuildPrioritizedErrorReportFromMessages([]string{message}, true) + + require.Len(t, report.DisplayedErrors, 1) + prioritized := report.DisplayedErrors[0] + assert.Equal(t, SeverityCritical, prioritized.Severity) + assert.Equal(t, "syntax", prioritized.Category) + assert.Equal(t, "Fix the YAML/frontmatter syntax first, then re-run `gh aw compile`.", prioritized.Suggestion) + assert.NotContains(t, prioritized.Suggestion, "event or filter") +} + +func TestBuildPrioritizedErrorReportFromMessages_ToolConfigMappingGetsSyntaxHint(t *testing.T) { + message := `/tmp/workflow.md:4:11: error: tools.github tool config must be a mapping (object), not a scalar value (for example: toolsets: [default]) + 3 | tools: +> 4 | github: "invalid-string" + ^ + 5 | toolsets: [default]` + + report := BuildPrioritizedErrorReportFromMessages([]string{message}, true) + + require.Len(t, report.DisplayedErrors, 1) + prioritized := report.DisplayedErrors[0] + assert.Equal(t, SeverityCritical, prioritized.Severity) + assert.Equal(t, "syntax", prioritized.Category) + assert.Equal(t, "Fix the YAML/frontmatter syntax first, then re-run `gh aw compile`.", prioritized.Suggestion) + assert.NotContains(t, prioritized.Suggestion, "event or filter") +} diff --git a/pkg/workflow/frontmatter_error.go b/pkg/workflow/frontmatter_error.go index ffa12bb0f60..24cf1403a9a 100644 --- a/pkg/workflow/frontmatter_error.go +++ b/pkg/workflow/frontmatter_error.go @@ -3,6 +3,7 @@ package workflow import ( "fmt" "regexp" + "strconv" "strings" "github.com/github/gh-aw/pkg/logger" @@ -21,6 +22,8 @@ const frontmatterParseErrPrefix = "failed to parse frontmatter:\n" var ( lineColPattern = regexp.MustCompile(`\[(\d+):(\d+)\]\s*(.+)`) sourceContextPattern = regexp.MustCompile(`\n(>?\s*\d+\s*\|)`) + yamlKeyLinePattern = regexp.MustCompile(`^(\s*)([A-Za-z0-9._-]+)\s*:\s*(.+?)\s*$`) + yamlAnyKeyLine = regexp.MustCompile(`^(\s*)([A-Za-z0-9._-]+)\s*:\s*(.*)$`) ) // readSourceContextLines extracts source lines around a target line (±3 lines) @@ -86,6 +89,8 @@ func (c *Compiler) createFrontmatterError(filePath, content string, err error, f if matches := lineColPattern.FindStringSubmatch(errorStr); len(matches) >= 4 { line := matches[1] col := matches[2] + originalLine := line + originalCol := col message := matches[3] // Extract just the first line of the message (before newline) if idx := strings.Index(message, "\n"); idx != -1 { @@ -94,6 +99,7 @@ func (c *Compiler) createFrontmatterError(filePath, content string, err error, f // Translate raw YAML parser messages to user-friendly plain English. // Uses the shared translation table from pkg/parser to keep both code paths in sync. message = parser.TranslateYAMLMessage(message) + line, col, message = improveFrontmatterDiagnostic(content, line, col, message) // Format as: filename:line:column: error: message // This is compatible with VSCode's problem matcher @@ -104,6 +110,11 @@ func (c *Compiler) createFrontmatterError(filePath, content string, err error, f if loc := sourceContextPattern.FindStringIndex(errorStr); loc != nil { // Extract from the first source context line to the end context := errorStr[loc[0]+1:] // +1 to skip the leading newline + if line != originalLine || col != originalCol { + if custom := renderSourceContextForPosition(content, parsePositiveInt(line), parsePositiveInt(col)); custom != "" { + context = custom + } + } // Return VSCode-compatible format on first line, followed by source context only frontmatterErrorLog.Print("Formatting error for VSCode compatibility") return parser.NewFormattedParserError(fmt.Sprintf("%s\n%s", vscodeFormat, context)) @@ -136,3 +147,138 @@ func (c *Compiler) createFrontmatterError(filePath, content string, err error, f fallbackFmt := fmt.Sprintf("%s:%d:1: error: %s", filePath, frontmatterLineOffset, err.Error()) return parser.NewFormattedParserError(fallbackFmt) } + +// improveFrontmatterDiagnostic adjusts known low-quality parser diagnostics to +// point at the true source line and provide user-facing wording. +func improveFrontmatterDiagnostic(content, line, col, message string) (string, string, string) { + lower := strings.ToLower(strings.TrimSpace(message)) + // Only the translated phrase is checked here because parser.TranslateYAMLMessage + // runs before improveFrontmatterDiagnostic (see createFrontmatterError); the raw + // parser wording is never seen by the time this function is called. + isScalarWithNestedKey := strings.Contains(lower, "value cannot have child keys here") + if !isScalarWithNestedKey { + return line, col, message + } + + lineNum, colNum := parsePositiveInt(line), parsePositiveInt(col) + if lineNum <= 1 || colNum <= 0 { + return line, col, message + } + + lines := strings.Split(content, "\n") + if lineNum-1 >= len(lines) { + return line, col, message + } + + childLine := lines[lineNum-1] + parentLineNum := lineNum - 1 + if parentLineNum-1 < 0 || parentLineNum-1 >= len(lines) { + return line, col, message + } + parentLine := lines[parentLineNum-1] + + parentMatch := yamlKeyLinePattern.FindStringSubmatch(parentLine) + if len(parentMatch) < 4 { + return line, col, message + } + + parentIndent := len(parentMatch[1]) + parentKey := parentMatch[2] + parentValue := strings.TrimSpace(parentMatch[3]) + if parentValue == "" || isLikelyYAMLContainer(parentValue) { + return line, col, message + } + + childIndent := len(childLine) - len(strings.TrimLeft(childLine, " ")) + if childIndent <= parentIndent { + return line, col, message + } + + ancestorKey := nearestAncestorKey(lines, parentLineNum-1, parentIndent) + if ancestorKey != "tools" { + return line, col, message + } + + // Find the value's column offset by searching after the colon, not the full line, + // to correctly handle keys and values that share a substring (e.g. "foo: foo"). + keyEnd := len(parentMatch[1]) + len(parentMatch[2]) + valueCol := colNum - 1 // fallback: original column + if colonPos := strings.Index(parentLine[keyEnd:], ":"); colonPos >= 0 { + afterColon := parentLine[keyEnd+colonPos+1:] + trimmed := strings.TrimLeft(afterColon, " ") + valueCol = keyEnd + colonPos + 1 + (len(afterColon) - len(trimmed)) + } + + return strconv.Itoa(parentLineNum), strconv.Itoa(valueCol + 1), + fmt.Sprintf("tools.%s tool config must be a mapping (object), not a scalar value (for example: toolsets: [default])", parentKey) +} + +func parsePositiveInt(v string) int { + n := 0 + for _, ch := range v { + if ch < '0' || ch > '9' { + return 0 + } + n = n*10 + int(ch-'0') + } + return n +} + +func isLikelyYAMLContainer(value string) bool { + switch strings.TrimSpace(value) { + case "|", ">", "{}", "[]": + return true + } + return strings.HasPrefix(value, "{") || strings.HasPrefix(value, "[") +} + +func nearestAncestorKey(lines []string, startIdx, childIndent int) string { + for i := startIdx; i >= 0; i-- { + line := lines[i] + trimmed := strings.TrimSpace(line) + if trimmed == "" || strings.HasPrefix(trimmed, "#") { + continue + } + + match := yamlAnyKeyLine.FindStringSubmatch(line) + if len(match) < 3 { + continue + } + if len(match[1]) < childIndent { + return match[2] + } + } + return "" +} + +func renderSourceContextForPosition(content string, targetLine, targetCol int) string { + if targetLine <= 0 { + return "" + } + lines := strings.Split(content, "\n") + if targetLine > len(lines) { + return "" + } + if targetCol <= 0 { + targetCol = 1 + } + + startLine := max(1, targetLine-2) + endLine := min(len(lines), targetLine+2) + + var b strings.Builder + for lineNum := startLine; lineNum <= endLine; lineNum++ { + prefix := " " + if lineNum == targetLine { + prefix = ">" + } + fmt.Fprintf(&b, "%s %3d | %s\n", prefix, lineNum, lines[lineNum-1]) + if lineNum == targetLine { + // The source-context prefix is: one prefix char ("%s") + one space + three-digit + // line number ("%3d") + " | " = 7 fixed chars plus the variable prefix char = 8 + // total. Column N therefore needs 7+N spaces before the caret character. + fmt.Fprintf(&b, "%s^\n", strings.Repeat(" ", 7+targetCol)) + } + } + return strings.TrimRight(b.String(), "\n") +}