diff --git a/docs/building-with-codegen/files-and-directories.mdx b/docs/building-with-codegen/files-and-directories.mdx index a543bbd1b..d3dfd12b4 100644 --- a/docs/building-with-codegen/files-and-directories.mdx +++ b/docs/building-with-codegen/files-and-directories.mdx @@ -50,6 +50,32 @@ dir = file.directory exists = codebase.has_directory("path/to/dir") ``` +## Working with Non-Code Files (README, JSON, etc.) + +By default, Codegen focuses on source code files (Python, TypeScript, etc). However, you can access all files in your codebase, including documentation, configuration, and other non-code files like README.md, package.json, or .env: + +```python +# Get all files in the codebase (including README, docs, config files) +files = codebase.files(extensions="*") + +# Print files that are not source code (documentation, config, etc) +for file in files: + if not file.filepath.endswith(('.py', '.ts', '.js')): + print(f"📄 Non-code file: {file.filepath}") +``` + +You can also filter for specific file types: + +```python +# Get only markdown documentation files +docs = codebase.files(extensions=[".md", ".mdx"]) + +# Get configuration files +config_files = codebase.files(extensions=[".json", ".yaml", ".toml"]) +``` + +These APIs are similar for [`Directory`](../api-reference/core/Directory), which provides similar methods for accessing files and subdirectories. + ## Raw Content and Metadata ```python diff --git a/src/codegen/sdk/core/codebase.py b/src/codegen/sdk/core/codebase.py index c5a1fda09..ac04058a1 100644 --- a/src/codegen/sdk/core/codebase.py +++ b/src/codegen/sdk/core/codebase.py @@ -224,6 +224,8 @@ def files(self, *, extensions: list[str] | Literal["*"] | None = None) -> list[T if extensions is None: # Return all source files files = self.G.get_nodes(NodeType.FILE) + elif isinstance(extensions, str) and extensions != "*": + raise ValueError("extensions must be a list of extensions or '*'") else: files = [] # Get all files with the specified extensions diff --git a/tests/unit/codebase/file/test_file.py b/tests/unit/codebase/file/test_file.py index c3a6fdce2..a2c572ac2 100644 --- a/tests/unit/codebase/file/test_file.py +++ b/tests/unit/codebase/file/test_file.py @@ -59,6 +59,28 @@ def test_codebase_files(tmpdir) -> None: assert {f for f in codebase.files(extensions=[".bin"])} == {file3} +@pytest.mark.skip("MDX editing is broken") +def test_codebase_edit_mdx(tmpdir) -> None: + """Editing MDx seems broken currently - it will just prepend to the file""" + with get_codebase_session(tmpdir=tmpdir, files={"file1.mdx": "# Header", "file2.tsx": "console.log('hello, world!')"}) as codebase: + file = codebase.get_file("file1.mdx") + file.edit("NEW TEXT") + codebase.commit() + file = codebase.get_file("file1.mdx") + assert file.content == "NEW TEXT" + + +@pytest.mark.skip("MDX replacing is broken") +def test_codebase_replace_mdx(tmpdir) -> None: + """Editing MDx seems broken currently - it will just prepend to the file""" + with get_codebase_session(tmpdir=tmpdir, files={"file1.mdx": "# Header"}) as codebase: + file = codebase.get_file("file1.mdx") + file.replace("# Header", "NEW TEXT") + codebase.commit() + file = codebase.get_file("file1.mdx") + assert file.content == "NEW TEXT" + + @pytest.mark.skipif(sys.platform == "darwin", reason="macOS is case-insensitive") def test_file_extensions_ignore_case(tmpdir) -> None: with get_codebase_session(tmpdir=tmpdir, files={"file1.py": "print(123)", "file2.py": "print(456)", "file3.bin": b"\x89PNG", "file4": "Hello world!"}) as codebase: