diff --git a/src/codegen/sdk/core/codebase.py b/src/codegen/sdk/core/codebase.py index 1979e5f89..a4879bdf1 100644 --- a/src/codegen/sdk/core/codebase.py +++ b/src/codegen/sdk/core/codebase.py @@ -30,7 +30,10 @@ from codegen.sdk._proxy import proxy_property from codegen.sdk.ai.client import get_openai_client from codegen.sdk.codebase.codebase_ai import generate_system_prompt, generate_tools -from codegen.sdk.codebase.codebase_context import GLOBAL_FILE_IGNORE_LIST, CodebaseContext +from codegen.sdk.codebase.codebase_context import ( + GLOBAL_FILE_IGNORE_LIST, + CodebaseContext, +) from codegen.sdk.codebase.config import ProjectConfig, SessionOptions from codegen.sdk.codebase.diff_lite import DiffLite from codegen.sdk.codebase.flagging.code_flag import CodeFlag @@ -109,7 +112,21 @@ @apidoc -class Codebase(Generic[TSourceFile, TDirectory, TSymbol, TClass, TFunction, TImport, TGlobalVar, TInterface, TTypeAlias, TParameter, TCodeBlock]): +class Codebase( + Generic[ + TSourceFile, + TDirectory, + TSymbol, + TClass, + TFunction, + TImport, + TGlobalVar, + TInterface, + TTypeAlias, + TParameter, + TCodeBlock, + ] +): """This class provides the main entrypoint for most programs to analyzing and manipulating codebases. Attributes: @@ -179,7 +196,10 @@ def __init__( # Initialize project with repo_path if projects is None if repo_path is not None: - main_project = ProjectConfig.from_path(repo_path, programming_language=ProgrammingLanguage(language.upper()) if language else None) + main_project = ProjectConfig.from_path( + repo_path, + programming_language=ProgrammingLanguage(language.upper()) if language else None, + ) projects = [main_project] else: main_project = projects[0] @@ -285,7 +305,10 @@ def files(self, *, extensions: list[str] | Literal["*"] | None = None) -> list[T else: files = [] # Get all files with the specified extensions - for filepath, _ in self._op.iter_files(extensions=None if extensions == "*" else extensions, ignore_list=GLOBAL_FILE_IGNORE_LIST): + for filepath, _ in self._op.iter_files( + extensions=None if extensions == "*" else extensions, + ignore_list=GLOBAL_FILE_IGNORE_LIST, + ): files.append(self.get_file(filepath, optional=False)) # Sort files alphabetically return sort_editables(files, alphabetical=True, dedupe=False) @@ -297,9 +320,12 @@ def codeowners(self) -> list["CodeOwner[TSourceFile]"]: Returns: list[CodeOwners]: A list of CodeOwners objects in the codebase. """ - if self.G.codeowners_parser is None: + if self.ctx.codeowners_parser is None: return [] - return CodeOwner.from_parser(self.G.codeowners_parser, lambda *args, **kwargs: self.files(*args, **kwargs)) + return CodeOwner.from_parser( + self.ctx.codeowners_parser, + lambda *args, **kwargs: self.files(*args, **kwargs), + ) @property def directories(self) -> list[TDirectory]: @@ -813,7 +839,14 @@ def reset(self, git_reset: bool = False) -> None: self.reset_logs() self.ctx.undo_applied_diffs() - def checkout(self, *, commit: str | GitCommit | None = None, branch: str | None = None, create_if_missing: bool = False, remote: bool = False) -> CheckoutResult: + def checkout( + self, + *, + commit: str | GitCommit | None = None, + branch: str | None = None, + create_if_missing: bool = False, + remote: bool = False, + ) -> CheckoutResult: """Checks out a git branch or commit and syncs the codebase graph to the new state. This method discards any pending changes, performs a git checkout of the specified branch or commit, @@ -938,7 +971,12 @@ def create_pr(self, title: str, body: str) -> PullRequest: raise ValueError(msg) self._op.stage_and_commit_all_changes(message=title) self._op.push_changes() - return self._op.remote_git_repo.create_pull(head_branch_name=self._op.git_cli.active_branch.name, base_branch_name=self._op.default_branch, title=title, body=body) + return self._op.remote_git_repo.create_pull( + head_branch_name=self._op.git_cli.active_branch.name, + base_branch_name=self._op.default_branch, + title=title, + body=body, + ) #################################################################################################################### # GRAPH VISUALIZATION @@ -1063,15 +1101,27 @@ def get_finalized_logs(self) -> str: @contextmanager @noapidoc - def session(self, sync_graph: bool = True, commit: bool = True, session_options: SessionOptions = SessionOptions()) -> Generator[None, None, None]: + def session( + self, + sync_graph: bool = True, + commit: bool = True, + session_options: SessionOptions = SessionOptions(), + ) -> Generator[None, None, None]: with self.ctx.session(sync_graph=sync_graph, commit=commit, session_options=session_options): yield None @noapidoc - def _enable_experimental_language_engine(self, async_start: bool = False, install_deps: bool = False, use_v8: bool = False) -> None: + def _enable_experimental_language_engine( + self, + async_start: bool = False, + install_deps: bool = False, + use_v8: bool = False, + ) -> None: """Debug option to enable experimental language engine for the current codebase.""" if install_deps and not self.ctx.language_engine: - from codegen.sdk.core.external.dependency_manager import get_dependency_manager + from codegen.sdk.core.external.dependency_manager import ( + get_dependency_manager, + ) logger.info("Cold installing dependencies...") logger.info("This may take a while for large repos...") @@ -1085,7 +1135,12 @@ def _enable_experimental_language_engine(self, async_start: bool = False, instal logger.info("Cold starting language engine...") logger.info("This may take a while for large repos...") - self.ctx.language_engine = get_language_engine(self.ctx.projects[0].programming_language, self.ctx, use_ts=True, use_v8=use_v8) + self.ctx.language_engine = get_language_engine( + self.ctx.projects[0].programming_language, + self.ctx, + use_ts=True, + use_v8=use_v8, + ) self.ctx.language_engine.start(async_start=async_start) # Wait for the language engine to be ready self.ctx.language_engine.wait_until_ready(ignore_error=False) @@ -1111,7 +1166,13 @@ def ai_client(self) -> OpenAI: self._ai_helper = get_openai_client(key=self.ctx.secrets.openai_api_key) return self._ai_helper - def ai(self, prompt: str, target: Editable | None = None, context: Editable | list[Editable] | dict[str, Editable | list[Editable]] | None = None, model: str = "gpt-4o") -> str: + def ai( + self, + prompt: str, + target: Editable | None = None, + context: Editable | list[Editable] | dict[str, Editable | list[Editable]] | None = None, + model: str = "gpt-4o", + ) -> str: """Generates a response from the AI based on the provided prompt, target, and context. A method that sends a prompt to the AI client along with optional target and context information to generate a response. @@ -1138,7 +1199,10 @@ def ai(self, prompt: str, target: Editable | None = None, context: Editable | li raise MaxAIRequestsError(msg, threshold=self.ctx.session_options.max_ai_requests) params = { - "messages": [{"role": "system", "content": generate_system_prompt(target, context)}, {"role": "user", "content": prompt}], + "messages": [ + {"role": "system", "content": generate_system_prompt(target, context)}, + {"role": "user", "content": prompt}, + ], "model": model, "functions": generate_tools(), "temperature": 0, @@ -1290,7 +1354,10 @@ def from_repo( # Initialize and return codebase with proper context logger.info("Initializing Codebase...") - project = ProjectConfig.from_repo_operator(repo_operator=repo_operator, programming_language=ProgrammingLanguage(language.upper()) if language else None) + project = ProjectConfig.from_repo_operator( + repo_operator=repo_operator, + programming_language=ProgrammingLanguage(language.upper()) if language else None, + ) codebase = Codebase(projects=[project], config=config, secrets=secrets) logger.info("Codebase initialization complete") return codebase @@ -1310,7 +1377,16 @@ def create_pr_comment(self, pr_number: int, body: str) -> None: """Create a comment on a pull request""" return self._op.create_pr_comment(pr_number, body) - def create_pr_review_comment(self, pr_number: int, body: str, commit_sha: str, path: str, line: int | None = None, side: str = "RIGHT", start_line: int | None = None) -> None: + def create_pr_review_comment( + self, + pr_number: int, + body: str, + commit_sha: str, + path: str, + line: int | None = None, + side: str = "RIGHT", + start_line: int | None = None, + ) -> None: """Create a review comment on a pull request. Args: @@ -1330,6 +1406,42 @@ def create_pr_review_comment(self, pr_number: int, body: str, commit_sha: str, p # The last 2 lines of code are added to the runner. See codegen-backend/cli/generate/utils.py # Type Aliases -CodebaseType = Codebase[SourceFile, Directory, Symbol, Class, Function, Import, Assignment, Interface, TypeAlias, Parameter, CodeBlock] -PyCodebaseType = Codebase[PyFile, PyDirectory, PySymbol, PyClass, PyFunction, PyImport, PyAssignment, Interface, TypeAlias, PyParameter, PyCodeBlock] -TSCodebaseType = Codebase[TSFile, TSDirectory, TSSymbol, TSClass, TSFunction, TSImport, TSAssignment, TSInterface, TSTypeAlias, TSParameter, TSCodeBlock] +CodebaseType = Codebase[ + SourceFile, + Directory, + Symbol, + Class, + Function, + Import, + Assignment, + Interface, + TypeAlias, + Parameter, + CodeBlock, +] +PyCodebaseType = Codebase[ + PyFile, + PyDirectory, + PySymbol, + PyClass, + PyFunction, + PyImport, + PyAssignment, + Interface, + TypeAlias, + PyParameter, + PyCodeBlock, +] +TSCodebaseType = Codebase[ + TSFile, + TSDirectory, + TSSymbol, + TSClass, + TSFunction, + TSImport, + TSAssignment, + TSInterface, + TSTypeAlias, + TSParameter, + TSCodeBlock, +] diff --git a/src/codegen/sdk/core/codeowner.py b/src/codegen/sdk/core/codeowner.py index bb896d83b..6a9d83932 100644 --- a/src/codegen/sdk/core/codeowner.py +++ b/src/codegen/sdk/core/codeowner.py @@ -37,6 +37,7 @@ class CodeOwner( files_source: A callable that returns an iterable of all files in the codebase. """ + _instance_iterator: Iterator[TFile] owner_type: Literal["USERNAME", "TEAM", "EMAIL"] owner_value: str files_source: Callable[FilesParam, Iterable[TFile]] @@ -91,7 +92,11 @@ def name(self) -> str: return self.owner_value def __iter__(self) -> Iterator[TFile]: - return iter(self.files_generator()) + self._instance_iterator = iter(self.files_generator()) + return self + + def __next__(self) -> str: + return next(self._instance_iterator) def __repr__(self) -> str: return f"CodeOwner(owner_type={self.owner_type}, owner_value={self.owner_value})" diff --git a/tests/integration/codegen/sdk/core/conftest.py b/tests/integration/codegen/sdk/core/conftest.py new file mode 100644 index 000000000..ba16e1ccf --- /dev/null +++ b/tests/integration/codegen/sdk/core/conftest.py @@ -0,0 +1,8 @@ +import pytest + +from codegen.sdk.code_generation.codegen_sdk_codebase import get_codegen_sdk_codebase + + +@pytest.fixture(scope="module") +def codebase(): + return get_codegen_sdk_codebase() diff --git a/tests/integration/codegen/sdk/core/test_codeowners.py b/tests/integration/codegen/sdk/core/test_codeowners.py new file mode 100644 index 000000000..be66895e6 --- /dev/null +++ b/tests/integration/codegen/sdk/core/test_codeowners.py @@ -0,0 +1,27 @@ +import pytest +from codeowners import CodeOwners + + +@pytest.fixture +def example_codeowners_file_contents() -> str: + return """# CODEOWNERS file example + +/src/codemods @user-a +/src/codegen @org/team1 +""" + + +def test_codebase_codeowners(codebase, example_codeowners_file_contents): + codebase.ctx.codeowners_parser = CodeOwners(example_codeowners_file_contents) + + assert isinstance(codebase.codeowners, list) + assert len(codebase.codeowners) == 2 + codeowners_by_name = {codeowner.name: codeowner for codeowner in codebase.codeowners} + assert codeowners_by_name["@user-a"].owner_type == "USERNAME" + assert codeowners_by_name["@org/team1"].owner_type == "TEAM" + + for _file in codeowners_by_name["@org/team1"]: + assert _file.filepath.startswith("src/codegen") + + for _file in codeowners_by_name["@user-a"]: + assert _file.filepath.startswith("src/codemods") diff --git a/tests/unit/codegen/sdk/core/test_codebase.py b/tests/unit/codegen/sdk/core/test_codebase.py new file mode 100644 index 000000000..d8369f4c5 --- /dev/null +++ b/tests/unit/codegen/sdk/core/test_codebase.py @@ -0,0 +1,41 @@ +from unittest.mock import MagicMock, create_autospec, patch + +import pytest + +from codegen.sdk.codebase.codebase_context import CodebaseContext +from codegen.sdk.codebase.factory.get_session import get_codebase_session + + +@pytest.fixture(autouse=True) +def context_mock(): + mock_context = create_autospec(CodebaseContext, instance=True) + for attr in CodebaseContext.__annotations__: + if not hasattr(mock_context, attr): + setattr(mock_context, attr, MagicMock(name=attr)) + with patch("codegen.sdk.core.codebase.CodebaseContext", return_value=mock_context): + yield mock_context + + +@pytest.fixture +def codebase(context_mock, tmpdir): + """Create a simple codebase for testing.""" + # language=python + content = """ +def hello(): + print("Hello, world!") + +class Greeter: + def greet(self): + hello() +""" + with get_codebase_session(tmpdir=tmpdir, files={"src/main.py": content}, verify_output=False) as codebase: + yield codebase + + +def test_codeowners_property(context_mock, codebase): + context_mock.codeowners_parser.paths = [(..., ..., [("test", "test")], ..., ...)] + codebase.files = MagicMock() + assert isinstance(codebase.codeowners, list) + assert len(codebase.codeowners) == 1 + assert callable(codebase.codeowners[0].files_source) + assert codebase.codeowners[0].files_source() == codebase.files.return_value diff --git a/tests/unit/codegen/sdk/core/test_codeowner.py b/tests/unit/codegen/sdk/core/test_codeowner.py index c075c9fe0..091eec78e 100644 --- a/tests/unit/codegen/sdk/core/test_codeowner.py +++ b/tests/unit/codegen/sdk/core/test_codeowner.py @@ -69,7 +69,13 @@ def test_from_parser_method(fake_files): # Create a fake parser with a paths attribute. fake_parser = MagicMock() fake_parser.paths = [ - ("pattern1", "ignored", [("USERNAME", "alice"), ("TEAM", "devs")], "ignored", "ignored"), + ( + "pattern1", + "ignored", + [("USERNAME", "alice"), ("TEAM", "devs")], + "ignored", + "ignored", + ), ("pattern2", "ignored", [("EMAIL", "bob@example.com")], "ignored", "ignored"), ]