From 14fa1e28f0d0f81b7a48279b9661170bb4926220 Mon Sep 17 00:00:00 2001 From: jayhack Date: Wed, 19 Feb 2025 18:24:24 -0800 Subject: [PATCH 1/4] . --- src/codegen/extensions/tools/semantic_edit.py | 197 +++++++++++------- 1 file changed, 118 insertions(+), 79 deletions(-) diff --git a/src/codegen/extensions/tools/semantic_edit.py b/src/codegen/extensions/tools/semantic_edit.py index fd30efa93..026f5d6af 100644 --- a/src/codegen/extensions/tools/semantic_edit.py +++ b/src/codegen/extensions/tools/semantic_edit.py @@ -85,32 +85,30 @@ def _extract_code_block(llm_response: str) -> str: return matches[-1] -def _merge_content(original_content: str, edited_content: str, start: int, end: int) -> str: - """Merge edited content with original content, preserving content outside the edit range. +def get_llm_edit(original_file_section: str, edit_content: str) -> str: + """Get edited content from LLM. Args: - original_content: Original file content - edited_content: New content for the specified range - start: Start line (1-indexed) - end: End line (1-indexed or -1 for end of file) + original_file_section: Original content to edit + edit_content: Edit specification/instructions Returns: - Merged content + LLM response with edited content """ - original_lines = original_content.split("\n") - edited_lines = edited_content.split("\n") - - if start == -1 and end == -1: # Append mode - return original_content + "\n" + edited_content + system_message = COMMANDER_SYSTEM_PROMPT + human_message = _HUMAN_PROMPT_DRAFT_EDITOR + prompt = ChatPromptTemplate.from_messages([system_message, human_message]) - # Convert to 0-indexed - start_idx = start - 1 - end_idx = end - 1 if end != -1 else len(original_lines) + llm = ChatAnthropic( + model="claude-3-5-sonnet-latest", + temperature=0, + max_tokens=5000, + ) - # Merge the content - result_lines = original_lines[:start_idx] + edited_lines + original_lines[end_idx + 1 :] + chain = prompt | llm + response = chain.invoke({"original_file_section": original_file_section, "edit_content": edit_content}) - return "\n".join(result_lines) + return response.content def _validate_edit_boundaries(original_lines: list[str], modified_lines: list[str], start_idx: int, end_idx: int) -> None: @@ -126,14 +124,107 @@ def _validate_edit_boundaries(original_lines: list[str], modified_lines: list[st ValueError: If changes were made outside the specified range """ # Check lines before start_idx - for i in range(start_idx): - if i >= len(original_lines) or i >= len(modified_lines) or original_lines[i] != modified_lines[i]: + for i in range(min(start_idx, len(original_lines), len(modified_lines))): + if original_lines[i] != modified_lines[i]: msg = f"Edit modified line {i + 1} which is before the specified start line {start_idx + 1}" raise ValueError(msg) + # Check lines after end_idx + remaining_lines = len(original_lines) - (end_idx + 1) + if remaining_lines > 0: + orig_suffix = original_lines[-remaining_lines:] + if len(modified_lines) >= remaining_lines: + mod_suffix = modified_lines[-remaining_lines:] + if orig_suffix != mod_suffix: + msg = f"Edit modified content after the specified end line {end_idx + 1}" + raise ValueError(msg) + + +def extract_file_window(file_content: str, start: int = 1, end: int = -1) -> tuple[str, int, int]: + """Extract a window of content from a file. + + Args: + file_content: Content of the file + start: Start line (1-indexed, default: 1) + end: End line (1-indexed or -1 for end of file, default: -1) + + Returns: + Tuple of (extracted_content, start_idx, end_idx) + """ + # Split into lines and handle line numbers + lines = file_content.split("\n") + total_lines = len(lines) + + # Convert to 0-indexed + start_idx = start - 1 + end_idx = end - 1 if end != -1 else total_lines - 1 + + # Get the content window + window_lines = lines[start_idx : end_idx + 1] + window_content = "\n".join(window_lines) + + return window_content, start_idx, end_idx + + +def apply_semantic_edit(codebase: Codebase, filepath: str, edited_content: str, start: int = 1, end: int = -1) -> tuple[str, str]: + """Apply a semantic edit to a section of content. + + Args: + codebase: Codebase object + filepath: Path to the file to edit + edited_content: New content for the specified range + start: Start line (1-indexed, default: 1) + end: End line (1-indexed or -1 for end of file, default: -1) + + Returns: + Tuple of (new_content, diff) + """ + # Get the original content + file = codebase.get_file(filepath) + original_content = file.content + + # Handle append mode + if start == -1 and end == -1: + new_content = original_content + "\n" + edited_content + diff = generate_diff(original_content, new_content) + file.edit(new_content) + codebase.commit() + return new_content, diff + + # Split content into lines + original_lines = original_content.splitlines() + edited_lines = edited_content.splitlines() + + # Convert to 0-indexed + start_idx = start - 1 + end_idx = end - 1 if end != -1 else len(original_lines) - 1 + + # Splice together: prefix + edited content + suffix + new_lines = ( + original_lines[:start_idx] # Prefix + + edited_lines # Edited section + + original_lines[end_idx + 1 :] # Suffix + ) + + # Preserve original file's newline if it had one + new_content = "\n".join(new_lines) + ("\n" if original_content.endswith("\n") else "") + + # Validate the edit boundaries + _validate_edit_boundaries(original_lines, new_lines, start_idx, end_idx) + + # Apply the edit + file.edit(new_content) + codebase.commit() + + # Generate diff from the original section to the edited section + original_section, _, _ = extract_file_window(original_content, start, end) + diff = generate_diff(original_section, edited_content) + + return new_content, diff + def semantic_edit(codebase: Codebase, filepath: str, edit_content: str, start: int = 1, end: int = -1) -> SemanticEditObservation: - """Edit a file using semantic editing with line range support. This is an internal api and should not be called by the LLM.""" + """Edit a file using semantic editing with line range support.""" try: file = codebase.get_file(filepath) except ValueError: @@ -158,46 +249,12 @@ def semantic_edit(codebase: Codebase, filepath: str, edit_content: str, start: i line_count=len(original_lines), ) - # Handle append mode - if start == -1 and end == -1: - try: - file.add_symbol_from_source(edit_content) - codebase.commit() - - return SemanticEditObservation( - status="success", - filepath=filepath, - new_content=file.content, - diff=generate_diff(original_content, file.content), - ) - except Exception as e: - msg = f"Failed to append content: {e!s}" - raise ValueError(msg) - - # For range edits, get the context for the draft editor - total_lines = len(original_lines) - start_idx = start - 1 - end_idx = end - 1 if end != -1 else total_lines - - # Get the context for the edit - context_lines = original_lines[start_idx : end_idx + 1] - original_file_section = "\n".join(context_lines) + # Extract the window of content to edit + original_file_section, start_idx, end_idx = extract_file_window(original_content, start, end) - # =====[ Get the LLM ]===== - system_message = COMMANDER_SYSTEM_PROMPT - human_message = _HUMAN_PROMPT_DRAFT_EDITOR - prompt = ChatPromptTemplate.from_messages([system_message, human_message]) - llm = ChatAnthropic( - model="claude-3-5-sonnet-latest", - temperature=0, - max_tokens=5000, - ) - chain = prompt | llm - response = chain.invoke({"original_file_section": original_file_section, "edit_content": edit_content}) - - # Extract code from markdown code block + # Get edited content from LLM try: - modified_segment = _extract_code_block(response.content) + modified_segment = _extract_code_block(get_llm_edit(original_file_section, edit_content)) except ValueError as e: return SemanticEditObservation( status="error", @@ -205,13 +262,9 @@ def semantic_edit(codebase: Codebase, filepath: str, edit_content: str, start: i filepath=filepath, ) - # Merge the edited content with the original - new_content = _merge_content(original_content, modified_segment, start, end) - new_lines = new_content.splitlines() - - # Validate that no changes were made before the start line + # Apply the semantic edit try: - _validate_edit_boundaries(original_lines, new_lines, start_idx, end_idx) + new_content, diff = apply_semantic_edit(codebase, filepath, modified_segment, start, end) except ValueError as e: return SemanticEditObservation( status="error", @@ -219,20 +272,6 @@ def semantic_edit(codebase: Codebase, filepath: str, edit_content: str, start: i filepath=filepath, ) - # Generate diff - diff = generate_diff(original_content, new_content) - - # Apply the edit - try: - file.edit(new_content) - codebase.commit() - except Exception as e: - return SemanticEditObservation( - status="error", - error=f"Failed to apply edit: {e!s}", - filepath=filepath, - ) - return SemanticEditObservation( status="success", filepath=filepath, From 3c79953b2077edc2edc5a5e5035d63c74ef9d50d Mon Sep 17 00:00:00 2001 From: jayhack Date: Wed, 19 Feb 2025 18:38:25 -0800 Subject: [PATCH 2/4] . --- src/codegen/extensions/tools/semantic_edit.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/codegen/extensions/tools/semantic_edit.py b/src/codegen/extensions/tools/semantic_edit.py index 026f5d6af..62e933e2b 100644 --- a/src/codegen/extensions/tools/semantic_edit.py +++ b/src/codegen/extensions/tools/semantic_edit.py @@ -208,6 +208,9 @@ def apply_semantic_edit(codebase: Codebase, filepath: str, edited_content: str, # Preserve original file's newline if it had one new_content = "\n".join(new_lines) + ("\n" if original_content.endswith("\n") else "") + print("-----") + print("\n".join(original_content.split("\n")[-4:])) + print("-------") # Validate the edit boundaries _validate_edit_boundaries(original_lines, new_lines, start_idx, end_idx) From 9a1ae74b48e3b5f9aaf0b70376ff3257606ee569 Mon Sep 17 00:00:00 2001 From: jayhack Date: Wed, 19 Feb 2025 18:47:10 -0800 Subject: [PATCH 3/4] . --- src/codegen/extensions/tools/semantic_edit.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/codegen/extensions/tools/semantic_edit.py b/src/codegen/extensions/tools/semantic_edit.py index 62e933e2b..916590d9f 100644 --- a/src/codegen/extensions/tools/semantic_edit.py +++ b/src/codegen/extensions/tools/semantic_edit.py @@ -218,6 +218,9 @@ def apply_semantic_edit(codebase: Codebase, filepath: str, edited_content: str, # Apply the edit file.edit(new_content) codebase.commit() + print("Manually editing") + with open(file.path, "w") as f: + f.write(new_content) # Generate diff from the original section to the edited section original_section, _, _ = extract_file_window(original_content, start, end) From cd9927b24501a686f0447bb4a2b935899fc829f7 Mon Sep 17 00:00:00 2001 From: jayhack Date: Wed, 19 Feb 2025 18:47:34 -0800 Subject: [PATCH 4/4] . --- src/codegen/extensions/tools/semantic_edit.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/codegen/extensions/tools/semantic_edit.py b/src/codegen/extensions/tools/semantic_edit.py index 916590d9f..6a1d043c4 100644 --- a/src/codegen/extensions/tools/semantic_edit.py +++ b/src/codegen/extensions/tools/semantic_edit.py @@ -208,17 +208,12 @@ def apply_semantic_edit(codebase: Codebase, filepath: str, edited_content: str, # Preserve original file's newline if it had one new_content = "\n".join(new_lines) + ("\n" if original_content.endswith("\n") else "") - print("-----") - print("\n".join(original_content.split("\n")[-4:])) - print("-------") - # Validate the edit boundaries _validate_edit_boundaries(original_lines, new_lines, start_idx, end_idx) # Apply the edit file.edit(new_content) codebase.commit() - print("Manually editing") with open(file.path, "w") as f: f.write(new_content)