From 510bed18f19242e49d5f17cb59887c4bd47665d7 Mon Sep 17 00:00:00 2001 From: codegen-bot Date: Thu, 27 Feb 2025 16:00:20 -0800 Subject: [PATCH 1/5] feat: sync changes to local state before run --- src/codegen/runner/sandbox/runner.py | 4 ---- src/codegen/runner/sandbox/server.py | 2 +- src/codegen/runner/servers/local_daemon.py | 22 +++++++++++++++++----- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/codegen/runner/sandbox/runner.py b/src/codegen/runner/sandbox/runner.py index 42434717d..1d811c62f 100644 --- a/src/codegen/runner/sandbox/runner.py +++ b/src/codegen/runner/sandbox/runner.py @@ -1,7 +1,5 @@ import sys -from git import Commit as GitCommit - from codegen.git.repo_operator.repo_operator import RepoOperator from codegen.git.schemas.enums import SetupOption from codegen.git.schemas.repo_config import RepoConfig @@ -21,7 +19,6 @@ class SandboxRunner: # =====[ __init__ instance attributes ]===== repo: RepoConfig - commit: GitCommit op: RepoOperator | None # =====[ computed instance attributes ]===== @@ -31,7 +28,6 @@ class SandboxRunner: def __init__(self, repo_config: RepoConfig, op: RepoOperator | None = None) -> None: self.repo = repo_config self.op = op or RepoOperator(repo_config=self.repo, setup_option=SetupOption.PULL_OR_CLONE, bot_commit=True) - self.commit = self.op.git_cli.head.commit async def warmup(self) -> None: """Warms up this runner by cloning the repo and parsing the graph.""" diff --git a/src/codegen/runner/sandbox/server.py b/src/codegen/runner/sandbox/server.py index 9b844a2ff..92cdb4735 100644 --- a/src/codegen/runner/sandbox/server.py +++ b/src/codegen/runner/sandbox/server.py @@ -45,7 +45,7 @@ async def lifespan(server: FastAPI): runner = SandboxRunner(repo_config=repo_config) server_info.warmup_state = WarmupState.PENDING await runner.warmup() - server_info.synced_commit = runner.commit.hexsha + server_info.synced_commit = runner.op.git_cli.head.commit.hexsha server_info.warmup_state = WarmupState.COMPLETED except Exception: logger.exception("Failed to build graph during warmup") diff --git a/src/codegen/runner/servers/local_daemon.py b/src/codegen/runner/servers/local_daemon.py index b8065e211..6d1a6ae22 100644 --- a/src/codegen/runner/servers/local_daemon.py +++ b/src/codegen/runner/servers/local_daemon.py @@ -51,7 +51,7 @@ async def lifespan(server: FastAPI): logger.info(f"Starting up fastapi server for repo_name={repo_config.name}") server_info.warmup_state = WarmupState.PENDING await runner.warmup() - server_info.synced_commit = runner.commit.hexsha + server_info.synced_commit = runner.op.head_commit.hexsha server_info.warmup_state = WarmupState.COMPLETED except Exception: @@ -73,9 +73,7 @@ def health() -> ServerInfo: @app.post(RUN_FUNCTION_ENDPOINT) async def run(request: RunFunctionRequest) -> CodemodRunResult: - # TODO: Sync graph to whatever changes are in the repo currently - - # Run the request + _save_uncommitted_changes_and_sync() diff_req = GetDiffRequest(codemod=Codemod(user_code=request.codemod_source)) diff_response = await runner.get_diff(request=diff_req) if request.commit: @@ -86,8 +84,22 @@ async def run(request: RunFunctionRequest) -> CodemodRunResult: return diff_response.result +def _save_uncommitted_changes_and_sync() -> None: + if commit := runner.codebase.git_commit("[Codegen] Save uncommitted changes"): + logger.info(f"Saved uncommitted changes to {commit.hexsha}") + + cur_commit = runner.op.head_commit + if cur_commit != runner.codebase.ctx.synced_commit: + logger.info(f"Syncing codebase to head commit: {cur_commit.hexsha}") + runner.codebase.sync_to_commit(commit=cur_commit) + else: + logger.info("Codebase is already synced to head commit") + + server_info.synced_commit = cur_commit.hexsha + + def _should_skip_commit(function_name: str) -> bool: - changed_files = runner.op.get_modified_files(runner.commit) + changed_files = runner.op.get_modified_files(runner.codebase.ctx.synced_commit) if len(changed_files) != 1: return False From 0699e236ee5c0c70a39a749e7f20990d0fd686a9 Mon Sep 17 00:00:00 2001 From: codegen-bot Date: Thu, 27 Feb 2025 16:11:18 -0800 Subject: [PATCH 2/5] nit fixes --- src/codegen/cli/commands/run/main.py | 2 +- src/codegen/runner/servers/local_daemon.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/codegen/cli/commands/run/main.py b/src/codegen/cli/commands/run/main.py index e42e18f7b..0d4ef48c4 100644 --- a/src/codegen/cli/commands/run/main.py +++ b/src/codegen/cli/commands/run/main.py @@ -14,7 +14,7 @@ @requires_init @click.argument("label", required=True) @click.option("--web", is_flag=True, help="Run the function on the web service instead of locally") -@click.option("--daemon", is_flag=True, help="Run the function against a running daemon") +@click.option("--daemon", "-d", is_flag=True, help="Run the function against a running daemon") @click.option("--diff-preview", type=int, help="Show a preview of the first N lines of the diff") @click.option("--arguments", type=str, help="Arguments as a json string to pass as the function's 'arguments' parameter") def run_command( diff --git a/src/codegen/runner/servers/local_daemon.py b/src/codegen/runner/servers/local_daemon.py index 6d1a6ae22..4e67458db 100644 --- a/src/codegen/runner/servers/local_daemon.py +++ b/src/codegen/runner/servers/local_daemon.py @@ -91,7 +91,7 @@ def _save_uncommitted_changes_and_sync() -> None: cur_commit = runner.op.head_commit if cur_commit != runner.codebase.ctx.synced_commit: logger.info(f"Syncing codebase to head commit: {cur_commit.hexsha}") - runner.codebase.sync_to_commit(commit=cur_commit) + runner.codebase.sync_to_commit(target_commit=cur_commit) else: logger.info("Codebase is already synced to head commit") From 4844d14b86495e5ba24cd6a9a1054edee1132d09 Mon Sep 17 00:00:00 2001 From: codegen-bot Date: Thu, 27 Feb 2025 16:33:21 -0800 Subject: [PATCH 3/5] fix --- src/codegen/shared/logging/get_logger.py | 2 +- tests/unit/codegen/runner/sandbox/test_runner.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/codegen/shared/logging/get_logger.py b/src/codegen/shared/logging/get_logger.py index 2c6c883d9..823e5161e 100644 --- a/src/codegen/shared/logging/get_logger.py +++ b/src/codegen/shared/logging/get_logger.py @@ -35,7 +35,7 @@ def get_logger(name: str, level: int = logging.INFO) -> logging.Logger: handler.setFormatter(formatter) logger.addHandler(handler) # Ensure the logger propagates to the root logger - logger.propagate = False + logger.propagate = True # Set the level on the logger itself logger.setLevel(level) return logger diff --git a/tests/unit/codegen/runner/sandbox/test_runner.py b/tests/unit/codegen/runner/sandbox/test_runner.py index 45d0058c3..d0f0a9ae9 100644 --- a/tests/unit/codegen/runner/sandbox/test_runner.py +++ b/tests/unit/codegen/runner/sandbox/test_runner.py @@ -40,7 +40,7 @@ async def test_sandbox_runner_warmup_starts_with_default_branch(mock_executor, r # assert len(runner.codebase._op.git_cli.branches) == 1 TODO: fix GHA creating master and main branch assert not runner.codebase._op.git_cli.head.is_detached assert runner.codebase._op.git_cli.active_branch.name == runner.codebase.default_branch - assert runner.codebase._op.git_cli.head.commit == runner.commit + assert runner.codebase._op.git_cli.head.commit == runner.op.head_commit @pytest.mark.asyncio From f847ef74377f460e72957e4785877fe0934a3a8a Mon Sep 17 00:00:00 2001 From: codegen-bot Date: Thu, 27 Feb 2025 17:38:15 -0800 Subject: [PATCH 4/5] exclude .codegen path --- src/codegen/cli/commands/start/main.py | 1 + .../git/repo_operator/repo_operator.py | 8 +++++++- src/codegen/runner/servers/local_daemon.py | 20 ++----------------- src/codegen/sdk/core/codebase.py | 4 ++-- 4 files changed, 12 insertions(+), 21 deletions(-) diff --git a/src/codegen/cli/commands/start/main.py b/src/codegen/cli/commands/start/main.py index ce2d1d919..652e400e1 100644 --- a/src/codegen/cli/commands/start/main.py +++ b/src/codegen/cli/commands/start/main.py @@ -132,6 +132,7 @@ def _run_docker_container(repo_config: RepoConfig, port: int, detached: bool) -> "REPOSITORY_PATH": container_repo_path, "GITHUB_TOKEN": SecretsConfig().github_token, "PYTHONUNBUFFERED": "1", # Ensure Python output is unbuffered + "CODEBASE_SYNC_ENABLED": "True", } envvars_args = [arg for k, v in envvars.items() for arg in ("--env", f"{k}={v}")] mount_args = ["-v", f"{repo_config.repo_path}:{container_repo_path}"] diff --git a/src/codegen/git/repo_operator/repo_operator.py b/src/codegen/git/repo_operator/repo_operator.py index db138b800..8eba2407e 100644 --- a/src/codegen/git/repo_operator/repo_operator.py +++ b/src/codegen/git/repo_operator/repo_operator.py @@ -458,12 +458,18 @@ def get_diffs(self, ref: str | GitCommit, reverse: bool = True) -> list[Diff]: return [diff for diff in self.git_cli.index.diff(ref, R=reverse)] @stopwatch - def stage_and_commit_all_changes(self, message: str, verify: bool = False) -> bool: + def stage_and_commit_all_changes(self, message: str, verify: bool = False, exclude_paths: list[str] | None = None) -> bool: """TODO: rename to stage_and_commit_changes Stage all changes and commit them with the given message. Returns True if a commit was made and False otherwise. """ self.git_cli.git.add(A=True) + # Unstage the excluded paths + for path in exclude_paths or []: + try: + self.git_cli.git.reset("HEAD", "--", path) + except GitCommandError as e: + logger.warning(f"Failed to exclude path {path}: {e}") return self.commit_changes(message, verify) def commit_changes(self, message: str, verify: bool = False) -> bool: diff --git a/src/codegen/runner/servers/local_daemon.py b/src/codegen/runner/servers/local_daemon.py index 4e67458db..4de9c669d 100644 --- a/src/codegen/runner/servers/local_daemon.py +++ b/src/codegen/runner/servers/local_daemon.py @@ -1,5 +1,4 @@ import logging -import os from contextlib import asynccontextmanager from fastapi import FastAPI @@ -77,15 +76,13 @@ async def run(request: RunFunctionRequest) -> CodemodRunResult: diff_req = GetDiffRequest(codemod=Codemod(user_code=request.codemod_source)) diff_response = await runner.get_diff(request=diff_req) if request.commit: - if _should_skip_commit(request.function_name): - logger.info(f"Skipping commit because only changes to {request.function_name} were made") - elif commit_sha := runner.codebase.git_commit(f"[Codegen] {request.function_name}"): + if commit_sha := runner.codebase.git_commit(f"[Codegen] {request.function_name}", exclude_paths=[".codegen/*"]): logger.info(f"Committed changes to {commit_sha.hexsha}") return diff_response.result def _save_uncommitted_changes_and_sync() -> None: - if commit := runner.codebase.git_commit("[Codegen] Save uncommitted changes"): + if commit := runner.codebase.git_commit("[Codegen] Save uncommitted changes", exclude_paths=[".codegen/*"]): logger.info(f"Saved uncommitted changes to {commit.hexsha}") cur_commit = runner.op.head_commit @@ -96,16 +93,3 @@ def _save_uncommitted_changes_and_sync() -> None: logger.info("Codebase is already synced to head commit") server_info.synced_commit = cur_commit.hexsha - - -def _should_skip_commit(function_name: str) -> bool: - changed_files = runner.op.get_modified_files(runner.codebase.ctx.synced_commit) - if len(changed_files) != 1: - return False - - file_path = changed_files[0] - if not file_path.startswith(".codegen/codemods/"): - return False - - changed_file_name = os.path.splitext(os.path.basename(file_path))[0] - return changed_file_name == function_name.replace("-", "_") diff --git a/src/codegen/sdk/core/codebase.py b/src/codegen/sdk/core/codebase.py index d1c85cc29..754f1fe01 100644 --- a/src/codegen/sdk/core/codebase.py +++ b/src/codegen/sdk/core/codebase.py @@ -734,7 +734,7 @@ def get_relative_path(self, from_file: str, to_file: str) -> str: # State/Git #################################################################################################################### - def git_commit(self, message: str, *, verify: bool = False) -> GitCommit | None: + def git_commit(self, message: str, *, verify: bool = False, exclude_paths: list[str] | None = None) -> GitCommit | None: """Stages + commits all changes to the codebase and git. Args: @@ -745,7 +745,7 @@ def git_commit(self, message: str, *, verify: bool = False) -> GitCommit | None: GitCommit | None: The commit object if changes were committed, None otherwise. """ self.ctx.commit_transactions(sync_graph=False) - if self._op.stage_and_commit_all_changes(message, verify): + if self._op.stage_and_commit_all_changes(message, verify, exclude_paths): logger.info(f"Commited repository to {self._op.head_commit} on {self._op.get_active_branch_or_commit()}") return self._op.head_commit else: From 2111c7712e45a4768639f71c8fc222ccbf94a44d Mon Sep 17 00:00:00 2001 From: codegen-bot Date: Thu, 27 Feb 2025 17:59:10 -0800 Subject: [PATCH 5/5] enable sync --- src/codegen/cli/commands/config/main.py | 2 +- src/codegen/runner/sandbox/runner.py | 9 +++++---- src/codegen/runner/servers/local_daemon.py | 6 ++++-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/codegen/cli/commands/config/main.py b/src/codegen/cli/commands/config/main.py index 00d3143e4..b4ec3f3d7 100644 --- a/src/codegen/cli/commands/config/main.py +++ b/src/codegen/cli/commands/config/main.py @@ -104,7 +104,7 @@ def set_command(key: str, value: str): return cur_value = config.get(key) - if cur_value is None or cur_value.lower() != value.lower(): + if cur_value is None or str(cur_value).lower() != value.lower(): try: config.set(key, value) except Exception as e: diff --git a/src/codegen/runner/sandbox/runner.py b/src/codegen/runner/sandbox/runner.py index 1d811c62f..4a86bc618 100644 --- a/src/codegen/runner/sandbox/runner.py +++ b/src/codegen/runner/sandbox/runner.py @@ -1,5 +1,6 @@ import sys +from codegen.configs.models.codebase import CodebaseConfig from codegen.git.repo_operator.repo_operator import RepoOperator from codegen.git.schemas.enums import SetupOption from codegen.git.schemas.repo_config import RepoConfig @@ -29,18 +30,18 @@ def __init__(self, repo_config: RepoConfig, op: RepoOperator | None = None) -> N self.repo = repo_config self.op = op or RepoOperator(repo_config=self.repo, setup_option=SetupOption.PULL_OR_CLONE, bot_commit=True) - async def warmup(self) -> None: + async def warmup(self, codebase_config: CodebaseConfig | None = None) -> None: """Warms up this runner by cloning the repo and parsing the graph.""" logger.info(f"===== Warming runner for {self.repo.full_name or self.repo.name} =====") sys.setrecursionlimit(10000) # for graph parsing - self.codebase = await self._build_graph() + self.codebase = await self._build_graph(codebase_config) self.executor = SandboxExecutor(self.codebase) - async def _build_graph(self) -> Codebase: + async def _build_graph(self, codebase_config: CodebaseConfig | None = None) -> Codebase: logger.info("> Building graph...") projects = [ProjectConfig(programming_language=self.repo.language, repo_operator=self.op, base_path=self.repo.base_path, subdirectories=self.repo.subdirectories)] - return Codebase(projects=projects) + return Codebase(projects=projects, config=codebase_config) async def get_diff(self, request: GetDiffRequest) -> GetDiffResponse: custom_scope = {"context": request.codemod.codemod_context} if request.codemod.codemod_context else {} diff --git a/src/codegen/runner/servers/local_daemon.py b/src/codegen/runner/servers/local_daemon.py index 4de9c669d..1d24006ae 100644 --- a/src/codegen/runner/servers/local_daemon.py +++ b/src/codegen/runner/servers/local_daemon.py @@ -3,6 +3,7 @@ from fastapi import FastAPI +from codegen.configs.models.codebase import DefaultCodebaseConfig from codegen.git.configs.constants import CODEGEN_BOT_EMAIL, CODEGEN_BOT_NAME from codegen.git.repo_operator.repo_operator import RepoOperator from codegen.git.schemas.enums import SetupOption @@ -46,10 +47,11 @@ async def lifespan(server: FastAPI): runner.op.git_cli.git.config("user.email", CODEGEN_BOT_EMAIL) runner.op.git_cli.git.config("user.name", CODEGEN_BOT_NAME) - # Parse the codebase + # Parse the codebase with sync enabled logger.info(f"Starting up fastapi server for repo_name={repo_config.name}") server_info.warmup_state = WarmupState.PENDING - await runner.warmup() + codebase_config = DefaultCodebaseConfig.model_copy(update={"sync_enabled": True}) + await runner.warmup(codebase_config=codebase_config) server_info.synced_commit = runner.op.head_commit.hexsha server_info.warmup_state = WarmupState.COMPLETED