From f96f933b03378e054d1dd873a2d566d31164ee77 Mon Sep 17 00:00:00 2001 From: codegen-bot Date: Wed, 29 Jan 2025 01:46:01 +0100 Subject: [PATCH] raw edit --- src/codegen/sdk/core/file.py | 67 ++++++++++ .../codegen/sdk/codebase/file/test_file.py | 22 ---- .../sdk/codebase/file/test_file_edit.py | 119 ++++++++++++++++++ 3 files changed, 186 insertions(+), 22 deletions(-) create mode 100644 tests/unit/codegen/sdk/codebase/file/test_file_edit.py diff --git a/src/codegen/sdk/core/file.py b/src/codegen/sdk/core/file.py index 259bd9961..00815adbb 100644 --- a/src/codegen/sdk/core/file.py +++ b/src/codegen/sdk/core/file.py @@ -344,6 +344,73 @@ def parse(self, G: "CodebaseGraph") -> None: def _compute_dependencies(self, *args, **kwargs) -> None: pass + @writer + def edit(self, new_src: str, fix_indentation: bool = False, priority: int = 0, dedupe: bool = True) -> None: + """Replace the source of this file with new_src. + + For non-source files, replaces the entire content. For source files, delegates to the parent + Editable implementation which uses TreeSitter nodes for precise editing. + + Args: + new_src (str): The new source text to replace the current text with. + fix_indentation (bool): If True, adjusts the indentation of new_src to match the current + text's indentation level. Only applies to source files. Defaults to False. + priority (int): The priority of the edit transaction. Higher priority edits are + applied first. Defaults to 0. + dedupe (bool): If True, deduplicates identical transactions. Defaults to True. + + Raises: + ValueError: If attempting to edit a binary file. + + Returns: + None + """ + if self.is_binary: + msg = "Cannot replace content in binary files" + raise ValueError(msg) + + if self.ts_node is None or not isinstance(self, SourceFile): + self._edit_byte_range(new_src, 0, len(self.content_bytes), priority, dedupe) + else: + super().edit(new_src, fix_indentation, priority, dedupe) + + @writer + def replace(self, old: str, new: str, count: int = -1, is_regex: bool = False, priority: int = 0) -> int: + """Replace occurrences of text in the file. + + For non-source files, performs a direct string replacement. For source files, delegates to the + parent Editable implementation which uses TreeSitter nodes for precise replacements. + + Args: + old (str): The text to be replaced. + new (str): The text to replace with. + count (int): Maximum number of replacements to make. -1 means replace all occurrences. + Only applies to source files. Defaults to -1. + is_regex (bool): If True, treat 'old' as a regular expression pattern. + Only applies to source files. Defaults to False. + priority (int): The priority of the edit transaction. Higher priority edits are + applied first. Defaults to 0. + + Raises: + ValueError: If attempting to replace content in a binary file. + + Returns: + list[Editable]: List of affected Editable objects. For non-source files, always returns + an empty list since they don't have Editable sub-components. + """ + if self.is_binary: + msg = "Cannot replace content in binary files" + raise ValueError(msg) + + if self.ts_node is None or not isinstance(self, SourceFile): + if old not in self.content: + return 0 + + self._edit_byte_range(self.content.replace(old, new), 0, len(self.content_bytes), priority) + return 1 + else: + return super().replace(old, new, count, is_regex, priority) + TImport = TypeVar("TImport", bound="Import") TFunction = TypeVar("TFunction", bound="Function") diff --git a/tests/unit/codegen/sdk/codebase/file/test_file.py b/tests/unit/codegen/sdk/codebase/file/test_file.py index a2c572ac2..c3a6fdce2 100644 --- a/tests/unit/codegen/sdk/codebase/file/test_file.py +++ b/tests/unit/codegen/sdk/codebase/file/test_file.py @@ -59,28 +59,6 @@ def test_codebase_files(tmpdir) -> None: assert {f for f in codebase.files(extensions=[".bin"])} == {file3} -@pytest.mark.skip("MDX editing is broken") -def test_codebase_edit_mdx(tmpdir) -> None: - """Editing MDx seems broken currently - it will just prepend to the file""" - with get_codebase_session(tmpdir=tmpdir, files={"file1.mdx": "# Header", "file2.tsx": "console.log('hello, world!')"}) as codebase: - file = codebase.get_file("file1.mdx") - file.edit("NEW TEXT") - codebase.commit() - file = codebase.get_file("file1.mdx") - assert file.content == "NEW TEXT" - - -@pytest.mark.skip("MDX replacing is broken") -def test_codebase_replace_mdx(tmpdir) -> None: - """Editing MDx seems broken currently - it will just prepend to the file""" - with get_codebase_session(tmpdir=tmpdir, files={"file1.mdx": "# Header"}) as codebase: - file = codebase.get_file("file1.mdx") - file.replace("# Header", "NEW TEXT") - codebase.commit() - file = codebase.get_file("file1.mdx") - assert file.content == "NEW TEXT" - - @pytest.mark.skipif(sys.platform == "darwin", reason="macOS is case-insensitive") def test_file_extensions_ignore_case(tmpdir) -> None: with get_codebase_session(tmpdir=tmpdir, files={"file1.py": "print(123)", "file2.py": "print(456)", "file3.bin": b"\x89PNG", "file4": "Hello world!"}) as codebase: diff --git a/tests/unit/codegen/sdk/codebase/file/test_file_edit.py b/tests/unit/codegen/sdk/codebase/file/test_file_edit.py new file mode 100644 index 000000000..240fa4cd5 --- /dev/null +++ b/tests/unit/codegen/sdk/codebase/file/test_file_edit.py @@ -0,0 +1,119 @@ +import pytest + +from codegen.sdk.codebase.factory.get_session import get_codebase_session +from codegen.sdk.core.file import SourceFile + + +def test_codebase_edit_mdx(tmpdir) -> None: + """Test editing MDX file content""" + with get_codebase_session(tmpdir=tmpdir, files={"file1.mdx": "# Header", "file2.tsx": "console.log('hello, world!')"}) as codebase: + file = codebase.get_file("file1.mdx") + file.edit("NEW TEXT") + codebase.commit() + file = codebase.get_file("file1.mdx") + assert file.content == "NEW TEXT" + + +def test_edit_json_file(tmpdir) -> None: + """Test editing JSON file content""" + with get_codebase_session(tmpdir=tmpdir, files={"config.json": '{"key": "value", "nested": {"foo": "bar"}}'}) as codebase: + file = codebase.get_file("config.json") + + # Test complete content replacement + file.edit('{"newKey": "newValue"}') + codebase.commit() + assert file.content == '{"newKey": "newValue"}' + + # Test partial content replacement + file.edit('{"newKey": "newValue", "extra": true}') + codebase.commit() + assert file.content == '{"newKey": "newValue", "extra": true}' + + +def test_edit_txt_file(tmpdir) -> None: + """Test editing plain text file content""" + with get_codebase_session(tmpdir=tmpdir, files={"data.txt": "Hello\nWorld\nTest"}) as codebase: + file = codebase.get_file("data.txt") + + # Test single line replacement + file.edit("New World") + codebase.commit() + assert file.content == "New World" + + # Test multiline content + file.edit("Line 1\nLine 2\nLine 3") + codebase.commit() + assert file.content == "Line 1\nLine 2\nLine 3" + + +def test_codebase_replace_mdx(tmpdir) -> None: + """Test replacing content in MDX file""" + with get_codebase_session(tmpdir=tmpdir, files={"file1.mdx": "# Header"}) as codebase: + file = codebase.get_file("file1.mdx") + file.replace("# Header", "NEW TEXT") + codebase.commit() + file = codebase.get_file("file1.mdx") + assert file.content == "NEW TEXT" + + +def test_replace_non_source_file(tmpdir) -> None: + """Test replace functionality in non-source files""" + with get_codebase_session(tmpdir=tmpdir, files={"doc.mdx": "# Header\nThis is a test\nMore content", "config.json": '{"test": "value", "other": "test"}'}) as codebase: + # Test single replacement + mdx_file = codebase.get_file("doc.mdx") + mdx_file.replace("test", "demo") + codebase.commit() + assert mdx_file.content == "# Header\nThis is a demo\nMore content" + + # Test multiple occurrences + json_file = codebase.get_file("config.json") + json_file.replace("test", "demo") + codebase.commit() + assert json_file.content == '{"demo": "value", "other": "demo"}' + + +def test_edit_binary_file_fails(tmpdir) -> None: + """Test that editing binary files raises an error""" + binary_content = bytes([0x89, 0x50, 0x4E, 0x47]) # PNG header + with get_codebase_session(tmpdir=tmpdir, files={"image.png": binary_content}) as codebase: + file = codebase.get_file("image.png") + + with pytest.raises(ValueError, match="Cannot replace content in binary files"): + file.edit("new content") + + with pytest.raises(ValueError, match="Cannot replace content in binary files"): + file.replace("old", "new") + + +def test_edit_source_file_preserves_behavior(tmpdir) -> None: + """Test that source files still use TreeSitter-based editing""" + with get_codebase_session(tmpdir=tmpdir, files={"script.py": "def test():\n print('hello')"}) as codebase: + file = codebase.get_file("script.py") + + # Should use TreeSitter node-based editing + file.edit("def test():\n print('world')") + codebase.commit() + assert file.content == "def test():\n print('world')" + + # Verify the file is still parseable as Python + assert isinstance(file, SourceFile) + assert file.get_function("test") is not None + + +def test_transaction_ordering_non_source_files(tmpdir) -> None: + """Test that transaction ordering works for non-source files""" + with get_codebase_session(tmpdir=tmpdir, files={"doc.md": "# Header\nContent\nFooter"}) as codebase: + file = codebase.get_file("doc.md") + + # Apply changes sequentially to avoid transaction conflicts + file.edit("# New Header\nContent\nFooter", priority=1) + codebase.commit() + + file.edit("# New Header\nNew Content\nFooter", priority=2) + codebase.commit() + + file.edit("# New Header\nNew Content\nNew Footer", priority=0) + codebase.commit() + + # Verify final content + assert file.content == "# New Header\nNew Content\nNew Footer"