-
Notifications
You must be signed in to change notification settings - Fork 305
feat: add fuzzy text matching fallbacks for edit_file tool #2145
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,6 +14,7 @@ import ( | |
| "regexp" | ||
| "strings" | ||
| "sync" | ||
| "unicode/utf8" | ||
|
|
||
| "github.com/docker/docker-agent/pkg/chat" | ||
| "github.com/docker/docker-agent/pkg/fsx" | ||
|
|
@@ -441,10 +442,11 @@ func (t *FilesystemTool) handleEditFile(ctx context.Context, args EditFileArgs) | |
|
|
||
| var changes []string | ||
| for i, edit := range args.Edits { | ||
| if !strings.Contains(modifiedContent, edit.OldText) { | ||
| replaced, ok := FindAndReplace(modifiedContent, edit.OldText, edit.NewText) | ||
| if !ok { | ||
| return tools.ResultError(fmt.Sprintf("Edit %d failed: old text not found", i+1)), nil | ||
| } | ||
| modifiedContent = strings.Replace(modifiedContent, edit.OldText, edit.NewText, 1) | ||
| modifiedContent = replaced | ||
| changes = append(changes, fmt.Sprintf("Edit %d: Replaced %d characters", i+1, len(edit.OldText))) | ||
| } | ||
|
|
||
|
|
@@ -463,6 +465,156 @@ func (t *FilesystemTool) handleEditFile(ctx context.Context, args EditFileArgs) | |
| return tools.ResultSuccess("File edited successfully. Changes:\n" + strings.Join(changes, "\n")), nil | ||
| } | ||
|
|
||
| // FindAndReplace tries to find oldText in content and replace the first | ||
| // occurrence with newText. It tries an exact match first, then falls back | ||
| // through progressively looser normalization strategies to handle common | ||
| // LLM mistakes (extra/missing whitespace, collapsed line continuations, | ||
| // escaped quotes). | ||
| // | ||
| // When a fuzzy strategy matches, the replacement is applied to the | ||
| // original (un-normalized) content so that surrounding text is preserved. | ||
| func FindAndReplace(content, oldText, newText string) (string, bool) { | ||
| // Strategy 1: exact match. | ||
| if strings.Contains(content, oldText) { | ||
| return strings.Replace(content, oldText, newText, 1), true | ||
| } | ||
|
|
||
| // Strategy 2: line-trimmed match (strip trailing whitespace per line). | ||
| if result, ok := normalizedReplace(content, oldText, newText, trimTrailingWhitespacePerLine); ok { | ||
| return result, true | ||
| } | ||
|
|
||
| // Strategy 3: indentation-flexible match (strip leading whitespace per line). | ||
| if result, ok := normalizedReplace(content, oldText, newText, stripLeadingWhitespacePerLine); ok { | ||
| return result, true | ||
| } | ||
|
|
||
| // Strategy 4: line-continuation normalization (collapse "\" + newline + spaces into a single space). | ||
| if result, ok := normalizedReplace(content, oldText, newText, collapseLineContinuations); ok { | ||
| return result, true | ||
| } | ||
|
|
||
| // Strategy 5: escape-normalized match (\" ↔ "). | ||
| if result, ok := normalizedReplace(content, oldText, newText, normalizeEscapedQuotes); ok { | ||
| return result, true | ||
| } | ||
|
|
||
| // Strategy 6: whitespace-collapsed match (collapse all runs of whitespace to single space). | ||
| if result, ok := normalizedReplace(content, oldText, newText, collapseWhitespace); ok { | ||
| return result, true | ||
| } | ||
|
|
||
| return content, false | ||
| } | ||
|
|
||
| // normalizedReplace applies a normalization function to both the content | ||
| // and the search text. If the normalized search text is found in the | ||
| // normalized content, it locates the corresponding range in the original | ||
| // content and performs the replacement there. | ||
| func normalizedReplace(content, oldText, newText string, normalize func(string) string) (string, bool) { | ||
| normContent := normalize(content) | ||
| normOld := normalize(oldText) | ||
|
|
||
| if normOld == "" { | ||
| return content, false | ||
| } | ||
|
|
||
| idx := strings.Index(normContent, normOld) | ||
| if idx == -1 { | ||
| return content, false | ||
| } | ||
|
|
||
| // Map the normalized indices back to the original content. | ||
| origStart := mapNormToOrig(content, idx, normalize) | ||
| origEnd := mapNormToOrigEnd(content, idx+len(normOld), normalize) | ||
|
|
||
| return content[:origStart] + newText + content[origEnd:], true | ||
| } | ||
|
|
||
| // mapNormToOrig finds the smallest rune-aligned position in the original | ||
| // string whose normalized prefix has at least normIdx characters. | ||
| func mapNormToOrig(original string, normIdx int, normalize func(string) string) int { | ||
| lo, hi := 0, len(original) | ||
| for lo < hi { | ||
| mid := (lo + hi) / 2 | ||
| if len(normalize(original[:mid])) < normIdx { | ||
| lo = mid + 1 | ||
| } else { | ||
| hi = mid | ||
| } | ||
| } | ||
| // Snap to the start of the rune at position lo to avoid splitting | ||
| // a multi-byte UTF-8 character. When lo == len(original) we are past | ||
| // the end of the string and no adjustment is needed. | ||
| for lo > 0 && lo < len(original) && !utf8.RuneStart(original[lo]) { | ||
| lo-- | ||
| } | ||
| return lo | ||
| } | ||
|
|
||
| // mapNormToOrigEnd is like mapNormToOrig but then advances past any | ||
| // trailing characters that were consumed by the normalization (e.g. the | ||
| // quote in \" → "). This ensures the entire original text corresponding | ||
| // to the normalized match is included. Advances by whole runes to | ||
| // preserve UTF-8 integrity. | ||
| func mapNormToOrigEnd(original string, normIdx int, normalize func(string) string) int { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔴 HIGH: mapNormToOrigEnd boundary detection logic is incorrect This function attempts to advance past trailing characters consumed by normalization, but the implementation is fundamentally incorrect for normalizations that map multiple characters to fewer. Root Cause:
Impact: Recommendation:
|
||
| pos := mapNormToOrig(original, normIdx, normalize) | ||
| for pos < len(original) { | ||
| _, size := utf8.DecodeRuneInString(original[pos:]) | ||
| if len(normalize(original[:pos+size])) != len(normalize(original[:pos])) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔴 HIGH: Incorrect termination condition for consumed character detection The condition The Problem: Impact:
Example:
The test suite doesn't cover these edge cases with UTF-8 and character-consuming normalizations. |
||
| break | ||
| } | ||
| pos += size | ||
| } | ||
| return pos | ||
| } | ||
|
|
||
| func trimTrailingWhitespacePerLine(s string) string { | ||
| lines := strings.Split(s, "\n") | ||
| for i, line := range lines { | ||
| lines[i] = strings.TrimRight(line, " \t") | ||
| } | ||
| return strings.Join(lines, "\n") | ||
| } | ||
|
|
||
| func stripLeadingWhitespacePerLine(s string) string { | ||
| lines := strings.Split(s, "\n") | ||
| for i, line := range lines { | ||
| lines[i] = strings.TrimLeft(line, " \t") | ||
| } | ||
| return strings.Join(lines, "\n") | ||
| } | ||
|
|
||
| // lineContinuationRE matches shell-style line continuations: optional | ||
| // whitespace, a backslash, optional whitespace, a newline, and optional | ||
| // leading whitespace on the next line. This is intentionally | ||
| // context-unaware — it does not distinguish continuations inside string | ||
| // literals from those in code. This is acceptable because the fuzzy | ||
| // strategies are only attempted after an exact match has already failed, | ||
| // making a false positive unlikely in practice. | ||
| var lineContinuationRE = regexp.MustCompile(`\s*\\\s*\n\s*`) | ||
|
|
||
| func collapseLineContinuations(s string) string { | ||
| return lineContinuationRE.ReplaceAllString(s, " ") | ||
| } | ||
|
|
||
| // normalizeEscapedQuotes replaces escaped quotes (\" → ") so that the | ||
| // LLM's unescaped version can match the file's escaped version. | ||
| // | ||
| // This strategy is only reached after the exact match (Strategy 1) | ||
| // fails, meaning the literal oldText does not appear in the content. | ||
| // A false positive would require the content to contain both the escaped | ||
| // and unescaped forms of the same text, which is rare in practice. | ||
| func normalizeEscapedQuotes(s string) string { | ||
| return strings.ReplaceAll(s, `\"`, `"`) | ||
| } | ||
|
|
||
| var whitespaceRE = regexp.MustCompile(`\s+`) | ||
|
|
||
| func collapseWhitespace(s string) string { | ||
| return whitespaceRE.ReplaceAllString(s, " ") | ||
| } | ||
|
|
||
| func (t *FilesystemTool) handleListDirectory(_ context.Context, args ListDirectoryArgs) (*tools.ToolCallResult, error) { | ||
| resolvedPath := t.resolvePath(args.Path) | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🟡 MEDIUM: Binary search may overshoot with collapsing normalizations
The binary search finds "the smallest position where the normalized prefix has at least normIdx characters", but this can cause off-by-one errors when normalization collapses characters.
Example:
Normalizing
"a b"(two spaces) to"a b"(one space):normalize("a ") = "a "has length 2Why LIKELY not CONFIRMED:
The binary search invariant appears correct in principle, but edge cases with collapsing normalizations could expose boundary issues. The real problem is that this function feeds into
mapNormToOrigEnd, which has confirmed bugs.Recommendation:
Add test cases specifically for boundary detection with: