diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a169cf8ad..7417db81e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -51,6 +51,10 @@ uv run pytest tests/unit -n auto uv run pytest tests/integration/codemod/test_codemods.py -n auto ``` +> [!TIP] +> +> - If on Linux the error `OSError: [Errno 24] Too many open files` appears then you might want to increase your _ulimit_ + ## Pull Request Process 1. Fork the repository and create your branch from `develop`. diff --git a/docs/building-with-codegen/imports.mdx b/docs/building-with-codegen/imports.mdx index c66e7736d..95ecff990 100644 --- a/docs/building-with-codegen/imports.mdx +++ b/docs/building-with-codegen/imports.mdx @@ -69,6 +69,11 @@ print(f"From file: {import_stmt.from_file.filepath}") print(f"To file: {import_stmt.to_file.filepath}") ``` + +With Python one can specify the `PYTHONPATH` environment variable which is then considered when resolving +packages. + + ## Working with External Modules You can determine if an import references an [ExternalModule](/api-reference/core/ExternalModule) by checking the type of [Import.imported_symbol](/api-reference/core/Import#imported-symbol), like so: diff --git a/src/codegen/cli/mcp/resources/system_prompt.py b/src/codegen/cli/mcp/resources/system_prompt.py index a44bb2f38..9535570ab 100644 --- a/src/codegen/cli/mcp/resources/system_prompt.py +++ b/src/codegen/cli/mcp/resources/system_prompt.py @@ -2858,6 +2858,11 @@ def validate_data(data: dict) -> bool: print(f"To file: {import_stmt.to_file.filepath}") ``` + +With Python one can specify the `PYTHONPATH` environment variable which is then considered when resolving +packages. + + ## Working with External Modules You can determine if an import references an [ExternalModule](/api-reference/core/ExternalModule) by checking the type of [Import.imported_symbol](/api-reference/core/Import#imported-symbol), like so: diff --git a/src/codegen/configs/models/codebase.py b/src/codegen/configs/models/codebase.py index fcc7d3617..88643cfcd 100644 --- a/src/codegen/configs/models/codebase.py +++ b/src/codegen/configs/models/codebase.py @@ -17,7 +17,9 @@ def __init__(self, prefix: str = "CODEBASE", *args, **kwargs) -> None: disable_graph: bool = False disable_file_parse: bool = False generics: bool = True + import_resolution_paths: list[str] = Field(default_factory=lambda: []) import_resolution_overrides: dict[str, str] = Field(default_factory=lambda: {}) + py_resolve_syspath: bool = False ts_dependency_manager: bool = False ts_language_engine: bool = False v8_ts_engine: bool = False diff --git a/src/codegen/sdk/python/import_resolution.py b/src/codegen/sdk/python/import_resolution.py index 46f9de63c..5cf8142ba 100644 --- a/src/codegen/sdk/python/import_resolution.py +++ b/src/codegen/sdk/python/import_resolution.py @@ -1,6 +1,7 @@ from __future__ import annotations import os +import sys from typing import TYPE_CHECKING from codegen.sdk.core.autocommit import reader @@ -104,6 +105,15 @@ def resolve_import(self, base_path: str | None = None, *, add_module_name: str | base_path, module_source.replace(".", "/") + "/" + symbol_name + ".py", ) + + # =====[ Check if we are importing an entire file with custom resolve path or sys.path enabled ]===== + if len(self.ctx.config.import_resolution_paths) > 0 or self.ctx.config.py_resolve_syspath: + # Handle resolve overrides first if both is set + resolve_paths: list[str] = self.ctx.config.import_resolution_paths + (sys.path if self.ctx.config.py_resolve_syspath else []) + if file := self._file_by_custom_resolve_paths(resolve_paths, filepath): + return ImportResolution(from_file=file, symbol=None, imports_file=True) + + # =====[ Default path ]===== if file := self.ctx.get_file(filepath): return ImportResolution(from_file=file, symbol=None, imports_file=True) @@ -113,8 +123,16 @@ def resolve_import(self, base_path: str | None = None, *, add_module_name: str | # You can't do `from a.b.c import foo` => `foo.utils.x` right now since `foo` is just a file... return ImportResolution(from_file=file, symbol=None, imports_file=True) - # =====[ Check if `module.py` file exists in the graph ]===== + # =====[ Check if `module.py` file exists in the graph with custom resolve path or sys.path enabled ]===== filepath = module_source.replace(".", "/") + ".py" + if len(self.ctx.config.import_resolution_paths) > 0 or self.ctx.config.py_resolve_syspath: + # Handle resolve overrides first if both is set + resolve_paths: list[str] = self.ctx.config.import_resolution_paths + (sys.path if self.ctx.config.py_resolve_syspath else []) + if file := self._file_by_custom_resolve_paths(resolve_paths, filepath): + symbol = file.get_node_by_name(symbol_name) + return ImportResolution(from_file=file, symbol=symbol) + + # =====[ Check if `module.py` file exists in the graph ]===== filepath = os.path.join(base_path, filepath) if file := self.ctx.get_file(filepath): symbol = file.get_node_by_name(symbol_name) @@ -163,6 +181,20 @@ def resolve_import(self, base_path: str | None = None, *, add_module_name: str | # ext = ExternalModule.from_import(self) # return ImportResolution(symbol=ext) + @noapidoc + @reader + def _file_by_custom_resolve_paths(self, resolve_paths: list[str], filepath: str) -> SourceFile | None: + """Check if a certain file import can be found within a set sys.path + + Returns either None or the SourceFile. + """ + for resolve_path in resolve_paths: + filepath_new: str = os.path.join(resolve_path, filepath) + if file := self.ctx.get_file(filepath_new): + return file + + return None + @noapidoc @reader def _relative_to_absolute_import(self, relative_import: str) -> str: diff --git a/src/codegen/sdk/system-prompt.txt b/src/codegen/sdk/system-prompt.txt index 1e6d6a904..95905981d 100644 --- a/src/codegen/sdk/system-prompt.txt +++ b/src/codegen/sdk/system-prompt.txt @@ -2879,6 +2879,11 @@ print(f"From file: {import_stmt.from_file.filepath}") print(f"To file: {import_stmt.to_file.filepath}") ``` + +With Python one can specify the `PYTHONPATH` environment variable which is then considered when resolving +packages. + + ## Working with External Modules You can determine if an import references an [ExternalModule](/api-reference/core/ExternalModule) by checking the type of [Import.imported_symbol](/api-reference/core/Import#imported-symbol), like so: diff --git a/tests/unit/codegen/sdk/python/import_resolution/test_import_resolution.py b/tests/unit/codegen/sdk/python/import_resolution/test_import_resolution.py index 63c58b5af..1fec2854a 100644 --- a/tests/unit/codegen/sdk/python/import_resolution/test_import_resolution.py +++ b/tests/unit/codegen/sdk/python/import_resolution/test_import_resolution.py @@ -1,3 +1,4 @@ +import sys from typing import TYPE_CHECKING from codegen.sdk.codebase.factory.get_session import get_codebase_session @@ -191,7 +192,7 @@ def update(): "consumer.py": """ from a.b.c import src as operations -def func_1(): +def func(): operations.update() """, }, @@ -215,6 +216,232 @@ def func_1(): assert call_site.file == consumer_file +def test_import_resolution_file_syspath_inactive(tmpdir: str, monkeypatch) -> None: + """Tests function.usages returns usages from file imports""" + # language=python + with get_codebase_session( + tmpdir, + files={ + "a/b/c/src.py": """ +def update(): + pass +""", + "consumer.py": """ +from b.c import src as operations + +def func(): + operations.update() +""", + }, + ) as codebase: + src_file: SourceFile = codebase.get_file("a/b/c/src.py") + consumer_file: SourceFile = codebase.get_file("consumer.py") + + # Disable resolution via sys.path + codebase.ctx.config.py_resolve_syspath = False + + # =====[ Imports cannot be found without sys.path being set and not active ]===== + assert len(consumer_file.imports) == 1 + src_import: Import = consumer_file.imports[0] + src_import_resolution: ImportResolution = src_import.resolve_import() + assert src_import_resolution is None + + # Modify sys.path for this test only + monkeypatch.syspath_prepend("a") + + # =====[ Imports cannot be found with sys.path set but not active ]===== + src_import_resolution = src_import.resolve_import() + assert src_import_resolution is None + + +def test_import_resolution_file_syspath_active(tmpdir: str, monkeypatch) -> None: + """Tests function.usages returns usages from file imports""" + # language=python + with get_codebase_session( + tmpdir, + files={ + "a/b/c/src.py": """ +def update(): + pass +""", + "consumer.py": """ +from b.c import src as operations + +def func(): + operations.update() +""", + }, + ) as codebase: + src_file: SourceFile = codebase.get_file("a/b/c/src.py") + consumer_file: SourceFile = codebase.get_file("consumer.py") + + # Enable resolution via sys.path + codebase.ctx.config.py_resolve_syspath = True + + # =====[ Imports cannot be found without sys.path being set ]===== + assert len(consumer_file.imports) == 1 + src_import: Import = consumer_file.imports[0] + src_import_resolution: ImportResolution = src_import.resolve_import() + assert src_import_resolution is None + + # Modify sys.path for this test only + monkeypatch.syspath_prepend("a") + + # =====[ Imports can be found with sys.path set and active ]===== + codebase.ctx.config.py_resolve_syspath = True + src_import_resolution = src_import.resolve_import() + assert src_import_resolution + assert src_import_resolution.from_file is src_file + assert src_import_resolution.imports_file is True + + +def test_import_resolution_file_custom_resolve_path(tmpdir: str) -> None: + """Tests function.usages returns usages from file imports""" + # language=python + with get_codebase_session( + tmpdir, + files={ + "a/b/c/src.py": """ +def update(): + pass +""", + "consumer.py": """ +from b.c import src as operations +from c import src as operations2 + +def func(): + operations.update() +""", + }, + ) as codebase: + src_file: SourceFile = codebase.get_file("a/b/c/src.py") + consumer_file: SourceFile = codebase.get_file("consumer.py") + + # =====[ Imports cannot be found without custom resolve path being set ]===== + assert len(consumer_file.imports) == 2 + src_import: Import = consumer_file.imports[0] + src_import_resolution: ImportResolution = src_import.resolve_import() + assert src_import_resolution is None + + # =====[ Imports cannot be found with custom resolve path set to invalid path ]===== + codebase.ctx.config.import_resolution_paths = ["x"] + src_import_resolution = src_import.resolve_import() + assert src_import_resolution is None + + # =====[ Imports can be found with custom resolve path set ]===== + codebase.ctx.config.import_resolution_paths = ["a"] + src_import_resolution = src_import.resolve_import() + assert src_import_resolution + assert src_import_resolution.from_file is src_file + assert src_import_resolution.imports_file is True + + # =====[ Imports can be found with custom resolve multi-path set ]===== + src_import = consumer_file.imports[1] + codebase.ctx.config.import_resolution_paths = ["a/b"] + src_import_resolution = src_import.resolve_import() + assert src_import_resolution + assert src_import_resolution.from_file is src_file + assert src_import_resolution.imports_file is True + + +def test_import_resolution_file_custom_resolve_and_syspath_precedence(tmpdir: str, monkeypatch) -> None: + """Tests function.usages returns usages from file imports""" + # language=python + with get_codebase_session( + tmpdir, + files={ + "a/c/src.py": """ +def update1(): + pass +""", + "a/b/c/src.py": """ +def update2(): + pass +""", + "consumer.py": """ +from c import src as operations + +def func(): + operations.update2() +""", + }, + ) as codebase: + src_file: SourceFile = codebase.get_file("a/b/c/src.py") + consumer_file: SourceFile = codebase.get_file("consumer.py") + + # Ensure we don't have overrites and enable syspath resolution + codebase.ctx.config.import_resolution_paths = [] + codebase.ctx.config.py_resolve_syspath = True + + # =====[ Import with sys.path set can be found ]===== + assert len(consumer_file.imports) == 1 + # Modify sys.path for this test only + monkeypatch.syspath_prepend("a") + src_import: Import = consumer_file.imports[0] + src_import_resolution = src_import.resolve_import() + assert src_import_resolution + assert src_import_resolution.from_file.file_path == "a/c/src.py" + + # =====[ Imports can be found with custom resolve over sys.path ]===== + codebase.ctx.config.import_resolution_paths = ["a/b"] + src_import_resolution = src_import.resolve_import() + assert src_import_resolution + assert src_import_resolution.from_file is src_file + assert src_import_resolution.imports_file is True + + +def test_import_resolution_default_conflicts_overrite(tmpdir: str, monkeypatch) -> None: + """Tests function.usages returns usages from file imports""" + # language=python + with get_codebase_session( + tmpdir, + files={ + "a/src.py": """ +def update1(): + pass +""", + "b/a/src.py": """ +def update2(): + pass +""", + "consumer.py": """ +from a import src as operations + +def func(): + operations.update2() +""", + }, + ) as codebase: + src_file: SourceFile = codebase.get_file("a/src.py") + src_file_overrite: SourceFile = codebase.get_file("b/a/src.py") + consumer_file: SourceFile = codebase.get_file("consumer.py") + + # Ensure we don't have overrites and enable syspath resolution + codebase.ctx.config.import_resolution_paths = [] + codebase.ctx.config.py_resolve_syspath = True + + # =====[ Default import works ]===== + assert len(consumer_file.imports) == 1 + src_import: Import = consumer_file.imports[0] + src_import_resolution = src_import.resolve_import() + assert src_import_resolution + assert src_import_resolution.from_file is src_file + + # =====[ Sys.path overrite has precedence ]===== + monkeypatch.syspath_prepend("b") + src_import_resolution = src_import.resolve_import() + assert src_import_resolution + assert src_import_resolution.from_file is not src_file + assert src_import_resolution.from_file is src_file_overrite + + # =====[ Custom overrite has precedence ]===== + codebase.ctx.config.import_resolution_paths = ["b"] + src_import_resolution = src_import.resolve_import() + assert src_import_resolution + assert src_import_resolution.from_file is not src_file + assert src_import_resolution.from_file is src_file_overrite + + def test_import_resolution_init_wildcard(tmpdir: str) -> None: """Tests that named import from a file with wildcard resolves properly""" # language=python