From b117fb815d4a784e5b971d7e08424dc6e7a76296 Mon Sep 17 00:00:00 2001 From: jayhack Date: Wed, 19 Feb 2025 14:24:57 -0800 Subject: [PATCH 1/3] addd replace tool --- src/codegen/extensions/langchain/agent.py | 2 + src/codegen/extensions/langchain/tools.py | 49 +++++- src/codegen/extensions/tools/__init__.py | 2 + .../extensions/tools/replacement_edit.py | 145 ++++++++++++++++++ src/codegen/extensions/tools/semantic_edit.py | 4 +- ...ol_prompts.py => semantic_edit_prompts.py} | 4 +- 6 files changed, 200 insertions(+), 6 deletions(-) create mode 100644 src/codegen/extensions/tools/replacement_edit.py rename src/codegen/extensions/tools/{tool_prompts.py => semantic_edit_prompts.py} (99%) diff --git a/src/codegen/extensions/langchain/agent.py b/src/codegen/extensions/langchain/agent.py index 20396006f..9d778072b 100644 --- a/src/codegen/extensions/langchain/agent.py +++ b/src/codegen/extensions/langchain/agent.py @@ -20,6 +20,7 @@ ListDirectoryTool, MoveSymbolTool, RenameFileTool, + ReplacementEditTool, RevealSymbolTool, SearchTool, SemanticEditTool, @@ -70,6 +71,7 @@ def create_codebase_agent( RevealSymbolTool(codebase), SemanticEditTool(codebase), SemanticSearchTool(codebase), + ReplacementEditTool(codebase), # =====[ Github Integration ]===== # Enable Github integration # GithubCreatePRTool(codebase), diff --git a/src/codegen/extensions/langchain/tools.py b/src/codegen/extensions/langchain/tools.py index b486a43ef..d2783b868 100644 --- a/src/codegen/extensions/langchain/tools.py +++ b/src/codegen/extensions/langchain/tools.py @@ -18,6 +18,7 @@ linear_search_issues_tool, ) from codegen.extensions.tools.link_annotation import add_links_to_message +from codegen.extensions.tools.replacement_edit import replacement_edit from codegen.extensions.tools.reveal_symbol import reveal_symbol from codegen.extensions.tools.search import search from codegen.extensions.tools.semantic_edit import semantic_edit @@ -37,7 +38,7 @@ view_file, view_pr, ) -from ..tools.tool_prompts import _FILE_EDIT_DESCRIPTION +from ..tools.semantic_edit_prompts import FILE_EDIT_PROMPT class ViewFileInput(BaseModel): @@ -257,7 +258,7 @@ class SemanticEditInput(BaseModel): """Input for semantic editing.""" filepath: str = Field(..., description="Path of the file relative to workspace root") - edit_content: str = Field(..., description=_FILE_EDIT_DESCRIPTION) + edit_content: str = Field(..., description=FILE_EDIT_PROMPT) start: int = Field(default=1, description="Starting line number (1-indexed, inclusive). Default is 1.") end: int = Field(default=-1, description="Ending line number (1-indexed, inclusive). Default is -1 (end of file).") @@ -706,6 +707,7 @@ def get_workspace_tools(codebase: Codebase) -> list["BaseTool"]: ListDirectoryTool(codebase), MoveSymbolTool(codebase), RenameFileTool(codebase), + ReplacementEditTool(codebase), RevealSymbolTool(codebase), RunBashCommandTool(), # Note: This tool doesn't need the codebase SearchTool(codebase), @@ -725,3 +727,46 @@ def get_workspace_tools(codebase: Codebase) -> list["BaseTool"]: LinearCreateIssueTool(codebase), LinearGetTeamsTool(codebase), ] + + +class ReplacementEditInput(BaseModel): + """Input for regex-based replacement editing.""" + + filepath: str = Field(..., description="Path to the file to edit") + pattern: str = Field(..., description="Regex pattern to match") + replacement: str = Field(..., description="Replacement text (can include regex groups)") + start: int = Field(default=1, description="Starting line number (1-indexed, inclusive). Default is 1.") + end: int = Field(default=-1, description="Ending line number (1-indexed, inclusive). Default is -1 (end of file).") + count: Optional[int] = Field(default=None, description="Maximum number of replacements. Default is None (replace all).") + + +class ReplacementEditTool(BaseTool): + """Tool for regex-based replacement editing of files.""" + + name: ClassVar[str] = "replace" + description: ClassVar[str] = "Replace text in a file using regex pattern matching. For files over 300 lines, specify a line range." + args_schema: ClassVar[type[BaseModel]] = ReplacementEditInput + codebase: Codebase = Field(exclude=True) + + def __init__(self, codebase: Codebase) -> None: + super().__init__(codebase=codebase) + + def _run( + self, + filepath: str, + pattern: str, + replacement: str, + start: int = 1, + end: int = -1, + count: Optional[int] = None, + ) -> str: + result = replacement_edit( + self.codebase, + filepath=filepath, + pattern=pattern, + replacement=replacement, + start=start, + end=end, + count=count, + ) + return json.dumps(result, indent=2) diff --git a/src/codegen/extensions/tools/__init__.py b/src/codegen/extensions/tools/__init__.py index 70375f9b8..038a5135b 100644 --- a/src/codegen/extensions/tools/__init__.py +++ b/src/codegen/extensions/tools/__init__.py @@ -17,6 +17,7 @@ from .list_directory import list_directory from .move_symbol import move_symbol from .rename_file import rename_file +from .replacement_edit import replacement_edit from .reveal_symbol import reveal_symbol from .search import search from .semantic_edit import semantic_edit @@ -42,6 +43,7 @@ # Symbol operations "move_symbol", "rename_file", + "replacement_edit", "reveal_symbol", # Search operations "search", diff --git a/src/codegen/extensions/tools/replacement_edit.py b/src/codegen/extensions/tools/replacement_edit.py new file mode 100644 index 000000000..3cdb219da --- /dev/null +++ b/src/codegen/extensions/tools/replacement_edit.py @@ -0,0 +1,145 @@ +"""Tool for making regex-based replacements in files.""" + +import difflib +import re +from typing import Optional + +from codegen import Codebase + +from .view_file import add_line_numbers + + +def generate_diff(original: str, modified: str) -> str: + """Generate a unified diff between two strings. + + Args: + original: Original content + modified: Modified content + + Returns: + Unified diff as a string + """ + original_lines = original.splitlines(keepends=True) + modified_lines = modified.splitlines(keepends=True) + + diff = difflib.unified_diff( + original_lines, + modified_lines, + fromfile="original", + tofile="modified", + lineterm="", + ) + + return "".join(diff) + + +def _merge_content(original_content: str, edited_content: str, start: int, end: int) -> str: + """Merge edited content with original content, preserving content outside the edit range. + + Args: + original_content: Original file content + edited_content: New content for the specified range + start: Start line (1-indexed) + end: End line (1-indexed or -1 for end of file) + + Returns: + Merged content + """ + original_lines = original_content.split("\n") + edited_lines = edited_content.split("\n") + + if start == -1 and end == -1: # Append mode + return original_content + "\n" + edited_content + + # Convert to 0-indexed + start_idx = start - 1 + end_idx = end - 1 if end != -1 else len(original_lines) + + # Merge the content + result_lines = original_lines[:start_idx] + edited_lines + original_lines[end_idx + 1 :] + + return "\n".join(result_lines) + + +def replacement_edit( + codebase: Codebase, + filepath: str, + pattern: str, + replacement: str, + start: int = 1, + end: int = -1, + count: Optional[int] = None, + flags: re.RegexFlag = re.MULTILINE, +) -> dict[str, str]: + """Replace text in a file using regex pattern matching. + + Args: + codebase: The codebase to operate on + filepath: Path to the file to edit + pattern: Regex pattern to match + replacement: Replacement text (can include regex groups) + start: Start line (1-indexed, default: 1) + end: End line (1-indexed, -1 for end of file) + count: Maximum number of replacements (None for all) + flags: Regex flags (default: re.MULTILINE) + + Returns: + Dict containing edit results and status + + Raises: + FileNotFoundError: If file not found + ValueError: If invalid line range or regex pattern + """ + try: + file = codebase.get_file(filepath) + except ValueError: + msg = f"File not found: {filepath}" + raise FileNotFoundError(msg) + + # Get the original content + original_content = file.content + original_lines = original_content.split("\n") + + # Get the section to edit + total_lines = len(original_lines) + start_idx = start - 1 + end_idx = end - 1 if end != -1 else total_lines + + # Get the content to edit + section_lines = original_lines[start_idx : end_idx + 1] + section_content = "\n".join(section_lines) + + try: + # Compile pattern for better error messages + regex = re.compile(pattern, flags) + except re.error as e: + msg = f"Invalid regex pattern: {e}" + raise ValueError(msg) + + # Perform the replacement + new_section = regex.sub(replacement, section_content, count=count) + + # If no changes were made, return early + if new_section == section_content: + return { + "filepath": filepath, + "status": "unchanged", + "message": "No matches found for the given pattern", + } + + # Merge the edited content with the original + new_content = _merge_content(original_content, new_section, start, end) + + # Generate diff + diff = generate_diff(original_content, new_content) + + # Apply the edit + file.edit(new_content) + codebase.commit() + + return { + "filepath": filepath, + "diff": diff, + "status": "success", + "new_content": add_line_numbers(new_content), + } diff --git a/src/codegen/extensions/tools/semantic_edit.py b/src/codegen/extensions/tools/semantic_edit.py index 69bc4186b..81cd98188 100644 --- a/src/codegen/extensions/tools/semantic_edit.py +++ b/src/codegen/extensions/tools/semantic_edit.py @@ -8,7 +8,7 @@ from codegen import Codebase -from .tool_prompts import _HUMAN_PROMPT_DRAFT_EDITOR, _SYSTEM_PROMPT_DRAFT_EDITOR +from .semantic_edit_prompts import _HUMAN_PROMPT_DRAFT_EDITOR, COMMANDER_SYSTEM_PROMPT from .view_file import add_line_numbers @@ -152,7 +152,7 @@ def semantic_edit(codebase: Codebase, filepath: str, edit_content: str, start: i original_file_section = "\n".join(context_lines) # =====[ Get the LLM ]===== - system_message = _SYSTEM_PROMPT_DRAFT_EDITOR + system_message = COMMANDER_SYSTEM_PROMPT human_message = _HUMAN_PROMPT_DRAFT_EDITOR prompt = ChatPromptTemplate.from_messages([system_message, human_message]) llm = ChatAnthropic( diff --git a/src/codegen/extensions/tools/tool_prompts.py b/src/codegen/extensions/tools/semantic_edit_prompts.py similarity index 99% rename from src/codegen/extensions/tools/tool_prompts.py rename to src/codegen/extensions/tools/semantic_edit_prompts.py index d382f9316..0b5884309 100644 --- a/src/codegen/extensions/tools/tool_prompts.py +++ b/src/codegen/extensions/tools/semantic_edit_prompts.py @@ -1,4 +1,4 @@ -_FILE_EDIT_DESCRIPTION = ( +FILE_EDIT_PROMPT = ( """Edit a file in plain-text format. * The assistant can edit files by specifying the file path and providing a draft of the new file content. * The draft content doesn't need to be exactly the same as the existing file; the assistant may skip unchanged lines using comments like `# unchanged` to indicate unchanged sections. @@ -274,7 +274,7 @@ def helper(): ) -_SYSTEM_PROMPT_DRAFT_EDITOR = """You are an expert code editor. +COMMANDER_SYSTEM_PROMPT = """You are an expert code editor. Another agent has determined an edit needs to be made to this file. From a7322b7470a8c9e1a473539dcf25ac3123f53774 Mon Sep 17 00:00:00 2001 From: jayhack Date: Wed, 19 Feb 2025 14:25:25 -0800 Subject: [PATCH 2/3] . --- tests/unit/codegen/extensions/test_tools.py | 61 +++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/unit/codegen/extensions/test_tools.py b/tests/unit/codegen/extensions/test_tools.py index 67122ca17..458159268 100644 --- a/tests/unit/codegen/extensions/test_tools.py +++ b/tests/unit/codegen/extensions/test_tools.py @@ -12,6 +12,7 @@ list_directory, move_symbol, rename_file, + replacement_edit, reveal_symbol, search, semantic_edit, @@ -179,3 +180,63 @@ def test_create_pr_review_comment(codebase): assert "error" not in result assert result["status"] == "success" assert result["message"] == "Review comment created successfully" + + +def test_replacement_edit(codebase): + """Test regex-based replacement editing.""" + # Test basic replacement + result = replacement_edit( + codebase, + filepath="src/main.py", + pattern=r'print\("Hello, world!"\)', + replacement='print("Goodbye, world!")', + ) + assert "error" not in result + assert result["status"] == "success" + assert 'print("Goodbye, world!")' in result["new_content"] + + # Test with line range + result = replacement_edit( + codebase, + filepath="src/main.py", + pattern=r"Greeter", + replacement="Welcomer", + start=5, # Class definition line + end=7, + ) + assert "error" not in result + assert result["status"] == "success" + assert "class Welcomer" in result["new_content"] + + # Test with regex groups + result = replacement_edit( + codebase, + filepath="src/main.py", + pattern=r"def (\w+)\(\):", + replacement=r"def \1_function():", + ) + assert "error" not in result + assert result["status"] == "success" + assert "def hello_function():" in result["new_content"] + + # Test with count limit + result = replacement_edit( + codebase, + filepath="src/main.py", + pattern=r"def", + replacement="async def", + count=1, # Only replace first occurrence + ) + assert "error" not in result + assert result["status"] == "success" + assert result["new_content"].count("async def") == 1 + + # Test no matches + result = replacement_edit( + codebase, + filepath="src/main.py", + pattern=r"nonexistent_pattern", + replacement="replacement", + ) + assert result["status"] == "unchanged" + assert "No matches found" in result["message"] From c8c055197fbd8c8c69301433b9ea41086ee9decd Mon Sep 17 00:00:00 2001 From: jayhack Date: Wed, 19 Feb 2025 14:27:43 -0800 Subject: [PATCH 3/3] . --- src/codegen/extensions/tools/replacement_edit.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/codegen/extensions/tools/replacement_edit.py b/src/codegen/extensions/tools/replacement_edit.py index 3cdb219da..de3868ac7 100644 --- a/src/codegen/extensions/tools/replacement_edit.py +++ b/src/codegen/extensions/tools/replacement_edit.py @@ -117,7 +117,10 @@ def replacement_edit( raise ValueError(msg) # Perform the replacement - new_section = regex.sub(replacement, section_content, count=count) + if count is None: + new_section = regex.sub(replacement, section_content) + else: + new_section = regex.sub(replacement, section_content, count=count) # If no changes were made, return early if new_section == section_content: