diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index a169cf8ad..7417db81e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -51,6 +51,10 @@ uv run pytest tests/unit -n auto
uv run pytest tests/integration/codemod/test_codemods.py -n auto
```
+> [!TIP]
+>
+> - If on Linux the error `OSError: [Errno 24] Too many open files` appears then you might want to increase your _ulimit_
+
## Pull Request Process
1. Fork the repository and create your branch from `develop`.
diff --git a/docs/building-with-codegen/imports.mdx b/docs/building-with-codegen/imports.mdx
index c66e7736d..95ecff990 100644
--- a/docs/building-with-codegen/imports.mdx
+++ b/docs/building-with-codegen/imports.mdx
@@ -69,6 +69,11 @@ print(f"From file: {import_stmt.from_file.filepath}")
print(f"To file: {import_stmt.to_file.filepath}")
```
+
+With Python one can specify the `PYTHONPATH` environment variable which is then considered when resolving
+packages.
+
+
## Working with External Modules
You can determine if an import references an [ExternalModule](/api-reference/core/ExternalModule) by checking the type of [Import.imported_symbol](/api-reference/core/Import#imported-symbol), like so:
diff --git a/src/codegen/cli/mcp/resources/system_prompt.py b/src/codegen/cli/mcp/resources/system_prompt.py
index a44bb2f38..9535570ab 100644
--- a/src/codegen/cli/mcp/resources/system_prompt.py
+++ b/src/codegen/cli/mcp/resources/system_prompt.py
@@ -2858,6 +2858,11 @@ def validate_data(data: dict) -> bool:
print(f"To file: {import_stmt.to_file.filepath}")
```
+
+With Python one can specify the `PYTHONPATH` environment variable which is then considered when resolving
+packages.
+
+
## Working with External Modules
You can determine if an import references an [ExternalModule](/api-reference/core/ExternalModule) by checking the type of [Import.imported_symbol](/api-reference/core/Import#imported-symbol), like so:
diff --git a/src/codegen/configs/models/codebase.py b/src/codegen/configs/models/codebase.py
index fcc7d3617..88643cfcd 100644
--- a/src/codegen/configs/models/codebase.py
+++ b/src/codegen/configs/models/codebase.py
@@ -17,7 +17,9 @@ def __init__(self, prefix: str = "CODEBASE", *args, **kwargs) -> None:
disable_graph: bool = False
disable_file_parse: bool = False
generics: bool = True
+ import_resolution_paths: list[str] = Field(default_factory=lambda: [])
import_resolution_overrides: dict[str, str] = Field(default_factory=lambda: {})
+ py_resolve_syspath: bool = False
ts_dependency_manager: bool = False
ts_language_engine: bool = False
v8_ts_engine: bool = False
diff --git a/src/codegen/sdk/python/import_resolution.py b/src/codegen/sdk/python/import_resolution.py
index 46f9de63c..5cf8142ba 100644
--- a/src/codegen/sdk/python/import_resolution.py
+++ b/src/codegen/sdk/python/import_resolution.py
@@ -1,6 +1,7 @@
from __future__ import annotations
import os
+import sys
from typing import TYPE_CHECKING
from codegen.sdk.core.autocommit import reader
@@ -104,6 +105,15 @@ def resolve_import(self, base_path: str | None = None, *, add_module_name: str |
base_path,
module_source.replace(".", "/") + "/" + symbol_name + ".py",
)
+
+ # =====[ Check if we are importing an entire file with custom resolve path or sys.path enabled ]=====
+ if len(self.ctx.config.import_resolution_paths) > 0 or self.ctx.config.py_resolve_syspath:
+ # Handle resolve overrides first if both is set
+ resolve_paths: list[str] = self.ctx.config.import_resolution_paths + (sys.path if self.ctx.config.py_resolve_syspath else [])
+ if file := self._file_by_custom_resolve_paths(resolve_paths, filepath):
+ return ImportResolution(from_file=file, symbol=None, imports_file=True)
+
+ # =====[ Default path ]=====
if file := self.ctx.get_file(filepath):
return ImportResolution(from_file=file, symbol=None, imports_file=True)
@@ -113,8 +123,16 @@ def resolve_import(self, base_path: str | None = None, *, add_module_name: str |
# You can't do `from a.b.c import foo` => `foo.utils.x` right now since `foo` is just a file...
return ImportResolution(from_file=file, symbol=None, imports_file=True)
- # =====[ Check if `module.py` file exists in the graph ]=====
+ # =====[ Check if `module.py` file exists in the graph with custom resolve path or sys.path enabled ]=====
filepath = module_source.replace(".", "/") + ".py"
+ if len(self.ctx.config.import_resolution_paths) > 0 or self.ctx.config.py_resolve_syspath:
+ # Handle resolve overrides first if both is set
+ resolve_paths: list[str] = self.ctx.config.import_resolution_paths + (sys.path if self.ctx.config.py_resolve_syspath else [])
+ if file := self._file_by_custom_resolve_paths(resolve_paths, filepath):
+ symbol = file.get_node_by_name(symbol_name)
+ return ImportResolution(from_file=file, symbol=symbol)
+
+ # =====[ Check if `module.py` file exists in the graph ]=====
filepath = os.path.join(base_path, filepath)
if file := self.ctx.get_file(filepath):
symbol = file.get_node_by_name(symbol_name)
@@ -163,6 +181,20 @@ def resolve_import(self, base_path: str | None = None, *, add_module_name: str |
# ext = ExternalModule.from_import(self)
# return ImportResolution(symbol=ext)
+ @noapidoc
+ @reader
+ def _file_by_custom_resolve_paths(self, resolve_paths: list[str], filepath: str) -> SourceFile | None:
+ """Check if a certain file import can be found within a set sys.path
+
+ Returns either None or the SourceFile.
+ """
+ for resolve_path in resolve_paths:
+ filepath_new: str = os.path.join(resolve_path, filepath)
+ if file := self.ctx.get_file(filepath_new):
+ return file
+
+ return None
+
@noapidoc
@reader
def _relative_to_absolute_import(self, relative_import: str) -> str:
diff --git a/src/codegen/sdk/system-prompt.txt b/src/codegen/sdk/system-prompt.txt
index 1e6d6a904..95905981d 100644
--- a/src/codegen/sdk/system-prompt.txt
+++ b/src/codegen/sdk/system-prompt.txt
@@ -2879,6 +2879,11 @@ print(f"From file: {import_stmt.from_file.filepath}")
print(f"To file: {import_stmt.to_file.filepath}")
```
+
+With Python one can specify the `PYTHONPATH` environment variable which is then considered when resolving
+packages.
+
+
## Working with External Modules
You can determine if an import references an [ExternalModule](/api-reference/core/ExternalModule) by checking the type of [Import.imported_symbol](/api-reference/core/Import#imported-symbol), like so:
diff --git a/tests/unit/codegen/sdk/python/import_resolution/test_import_resolution.py b/tests/unit/codegen/sdk/python/import_resolution/test_import_resolution.py
index 63c58b5af..1fec2854a 100644
--- a/tests/unit/codegen/sdk/python/import_resolution/test_import_resolution.py
+++ b/tests/unit/codegen/sdk/python/import_resolution/test_import_resolution.py
@@ -1,3 +1,4 @@
+import sys
from typing import TYPE_CHECKING
from codegen.sdk.codebase.factory.get_session import get_codebase_session
@@ -191,7 +192,7 @@ def update():
"consumer.py": """
from a.b.c import src as operations
-def func_1():
+def func():
operations.update()
""",
},
@@ -215,6 +216,232 @@ def func_1():
assert call_site.file == consumer_file
+def test_import_resolution_file_syspath_inactive(tmpdir: str, monkeypatch) -> None:
+ """Tests function.usages returns usages from file imports"""
+ # language=python
+ with get_codebase_session(
+ tmpdir,
+ files={
+ "a/b/c/src.py": """
+def update():
+ pass
+""",
+ "consumer.py": """
+from b.c import src as operations
+
+def func():
+ operations.update()
+""",
+ },
+ ) as codebase:
+ src_file: SourceFile = codebase.get_file("a/b/c/src.py")
+ consumer_file: SourceFile = codebase.get_file("consumer.py")
+
+ # Disable resolution via sys.path
+ codebase.ctx.config.py_resolve_syspath = False
+
+ # =====[ Imports cannot be found without sys.path being set and not active ]=====
+ assert len(consumer_file.imports) == 1
+ src_import: Import = consumer_file.imports[0]
+ src_import_resolution: ImportResolution = src_import.resolve_import()
+ assert src_import_resolution is None
+
+ # Modify sys.path for this test only
+ monkeypatch.syspath_prepend("a")
+
+ # =====[ Imports cannot be found with sys.path set but not active ]=====
+ src_import_resolution = src_import.resolve_import()
+ assert src_import_resolution is None
+
+
+def test_import_resolution_file_syspath_active(tmpdir: str, monkeypatch) -> None:
+ """Tests function.usages returns usages from file imports"""
+ # language=python
+ with get_codebase_session(
+ tmpdir,
+ files={
+ "a/b/c/src.py": """
+def update():
+ pass
+""",
+ "consumer.py": """
+from b.c import src as operations
+
+def func():
+ operations.update()
+""",
+ },
+ ) as codebase:
+ src_file: SourceFile = codebase.get_file("a/b/c/src.py")
+ consumer_file: SourceFile = codebase.get_file("consumer.py")
+
+ # Enable resolution via sys.path
+ codebase.ctx.config.py_resolve_syspath = True
+
+ # =====[ Imports cannot be found without sys.path being set ]=====
+ assert len(consumer_file.imports) == 1
+ src_import: Import = consumer_file.imports[0]
+ src_import_resolution: ImportResolution = src_import.resolve_import()
+ assert src_import_resolution is None
+
+ # Modify sys.path for this test only
+ monkeypatch.syspath_prepend("a")
+
+ # =====[ Imports can be found with sys.path set and active ]=====
+ codebase.ctx.config.py_resolve_syspath = True
+ src_import_resolution = src_import.resolve_import()
+ assert src_import_resolution
+ assert src_import_resolution.from_file is src_file
+ assert src_import_resolution.imports_file is True
+
+
+def test_import_resolution_file_custom_resolve_path(tmpdir: str) -> None:
+ """Tests function.usages returns usages from file imports"""
+ # language=python
+ with get_codebase_session(
+ tmpdir,
+ files={
+ "a/b/c/src.py": """
+def update():
+ pass
+""",
+ "consumer.py": """
+from b.c import src as operations
+from c import src as operations2
+
+def func():
+ operations.update()
+""",
+ },
+ ) as codebase:
+ src_file: SourceFile = codebase.get_file("a/b/c/src.py")
+ consumer_file: SourceFile = codebase.get_file("consumer.py")
+
+ # =====[ Imports cannot be found without custom resolve path being set ]=====
+ assert len(consumer_file.imports) == 2
+ src_import: Import = consumer_file.imports[0]
+ src_import_resolution: ImportResolution = src_import.resolve_import()
+ assert src_import_resolution is None
+
+ # =====[ Imports cannot be found with custom resolve path set to invalid path ]=====
+ codebase.ctx.config.import_resolution_paths = ["x"]
+ src_import_resolution = src_import.resolve_import()
+ assert src_import_resolution is None
+
+ # =====[ Imports can be found with custom resolve path set ]=====
+ codebase.ctx.config.import_resolution_paths = ["a"]
+ src_import_resolution = src_import.resolve_import()
+ assert src_import_resolution
+ assert src_import_resolution.from_file is src_file
+ assert src_import_resolution.imports_file is True
+
+ # =====[ Imports can be found with custom resolve multi-path set ]=====
+ src_import = consumer_file.imports[1]
+ codebase.ctx.config.import_resolution_paths = ["a/b"]
+ src_import_resolution = src_import.resolve_import()
+ assert src_import_resolution
+ assert src_import_resolution.from_file is src_file
+ assert src_import_resolution.imports_file is True
+
+
+def test_import_resolution_file_custom_resolve_and_syspath_precedence(tmpdir: str, monkeypatch) -> None:
+ """Tests function.usages returns usages from file imports"""
+ # language=python
+ with get_codebase_session(
+ tmpdir,
+ files={
+ "a/c/src.py": """
+def update1():
+ pass
+""",
+ "a/b/c/src.py": """
+def update2():
+ pass
+""",
+ "consumer.py": """
+from c import src as operations
+
+def func():
+ operations.update2()
+""",
+ },
+ ) as codebase:
+ src_file: SourceFile = codebase.get_file("a/b/c/src.py")
+ consumer_file: SourceFile = codebase.get_file("consumer.py")
+
+ # Ensure we don't have overrites and enable syspath resolution
+ codebase.ctx.config.import_resolution_paths = []
+ codebase.ctx.config.py_resolve_syspath = True
+
+ # =====[ Import with sys.path set can be found ]=====
+ assert len(consumer_file.imports) == 1
+ # Modify sys.path for this test only
+ monkeypatch.syspath_prepend("a")
+ src_import: Import = consumer_file.imports[0]
+ src_import_resolution = src_import.resolve_import()
+ assert src_import_resolution
+ assert src_import_resolution.from_file.file_path == "a/c/src.py"
+
+ # =====[ Imports can be found with custom resolve over sys.path ]=====
+ codebase.ctx.config.import_resolution_paths = ["a/b"]
+ src_import_resolution = src_import.resolve_import()
+ assert src_import_resolution
+ assert src_import_resolution.from_file is src_file
+ assert src_import_resolution.imports_file is True
+
+
+def test_import_resolution_default_conflicts_overrite(tmpdir: str, monkeypatch) -> None:
+ """Tests function.usages returns usages from file imports"""
+ # language=python
+ with get_codebase_session(
+ tmpdir,
+ files={
+ "a/src.py": """
+def update1():
+ pass
+""",
+ "b/a/src.py": """
+def update2():
+ pass
+""",
+ "consumer.py": """
+from a import src as operations
+
+def func():
+ operations.update2()
+""",
+ },
+ ) as codebase:
+ src_file: SourceFile = codebase.get_file("a/src.py")
+ src_file_overrite: SourceFile = codebase.get_file("b/a/src.py")
+ consumer_file: SourceFile = codebase.get_file("consumer.py")
+
+ # Ensure we don't have overrites and enable syspath resolution
+ codebase.ctx.config.import_resolution_paths = []
+ codebase.ctx.config.py_resolve_syspath = True
+
+ # =====[ Default import works ]=====
+ assert len(consumer_file.imports) == 1
+ src_import: Import = consumer_file.imports[0]
+ src_import_resolution = src_import.resolve_import()
+ assert src_import_resolution
+ assert src_import_resolution.from_file is src_file
+
+ # =====[ Sys.path overrite has precedence ]=====
+ monkeypatch.syspath_prepend("b")
+ src_import_resolution = src_import.resolve_import()
+ assert src_import_resolution
+ assert src_import_resolution.from_file is not src_file
+ assert src_import_resolution.from_file is src_file_overrite
+
+ # =====[ Custom overrite has precedence ]=====
+ codebase.ctx.config.import_resolution_paths = ["b"]
+ src_import_resolution = src_import.resolve_import()
+ assert src_import_resolution
+ assert src_import_resolution.from_file is not src_file
+ assert src_import_resolution.from_file is src_file_overrite
+
+
def test_import_resolution_init_wildcard(tmpdir: str) -> None:
"""Tests that named import from a file with wildcard resolves properly"""
# language=python