From 8e34b3a0c2b07b2ac861182d6487b0b232c1f2d1 Mon Sep 17 00:00:00 2001 From: Carol Jung Date: Tue, 18 Feb 2025 16:22:58 -0800 Subject: [PATCH 1/2] feat: Server client vs. codebase client --- src/codegen/runner/clients/codebase_client.py | 36 +++++++++++ .../{sandbox_client.py => server_client.py} | 62 ++++++++++--------- tests/integration/codegen/runner/conftest.py | 6 +- .../codegen/runner/test_create_branch.py | 6 +- .../test_create_branch_with_grouping.py | 6 +- 5 files changed, 77 insertions(+), 39 deletions(-) create mode 100644 src/codegen/runner/clients/codebase_client.py rename src/codegen/runner/clients/{sandbox_client.py => server_client.py} (60%) diff --git a/src/codegen/runner/clients/codebase_client.py b/src/codegen/runner/clients/codebase_client.py new file mode 100644 index 000000000..186ddc316 --- /dev/null +++ b/src/codegen/runner/clients/codebase_client.py @@ -0,0 +1,36 @@ +"""Client used to abstract the weird stdin/stdout communication we have with the sandbox""" + +import logging + +from codegen.git.schemas.repo_config import RepoConfig +from codegen.runner.clients.server_client import LocalServerClient +from codegen.runner.models.apis import SANDBOX_SERVER_PORT + +logger = logging.getLogger(__name__) + +RUNNER_SERVER_PATH = "codegen.runner.sandbox.server:app" + + +class CodebaseClient(LocalServerClient): + """Client for interacting with the locally hosted sandbox server.""" + + repo_config: RepoConfig + git_access_token: str | None + + def __init__(self, repo_config: RepoConfig, git_access_token: str | None, host: str = "127.0.0.1", port: int = SANDBOX_SERVER_PORT): + self.repo_config = repo_config + self.git_access_token = git_access_token + super().__init__(server_path=RUNNER_SERVER_PATH, host=host, port=port) + + def _get_envs(self): + envs = super()._get_envs() + envs.update( + { + "CODEGEN_REPOSITORY__REPO_PATH": self.repo_config.repo_path, + "CODEGEN_REPOSITORY__REPO_NAME": self.repo_config.name, + "CODEGEN_REPOSITORY__FULL_NAME": self.repo_config.full_name, + "CODEGEN_REPOSITORY__LANGUAGE": self.repo_config.language.value, + "CODEGEN_SECRETS__GITHUB_TOKEN": self.git_access_token, + } + ) + return envs diff --git a/src/codegen/runner/clients/sandbox_client.py b/src/codegen/runner/clients/server_client.py similarity index 60% rename from src/codegen/runner/clients/sandbox_client.py rename to src/codegen/runner/clients/server_client.py index 7c0004400..5ae68db0e 100644 --- a/src/codegen/runner/clients/sandbox_client.py +++ b/src/codegen/runner/clients/server_client.py @@ -8,51 +8,52 @@ import requests from fastapi import params -from codegen.git.schemas.repo_config import RepoConfig -from codegen.runner.models.apis import SANDBOX_SERVER_PORT - logger = logging.getLogger(__name__) +DEFAULT_SERVER_PORT = 4002 + +EPHEMERAL_SERVER_PATH = "codegen.runner.sandbox.ephemeral_server:app" -class SandboxClient: - """Client for interacting with the locally hosted sandbox server.""" + +class LocalServerClient: + """Client for interacting with the sandbox server.""" host: str port: int base_url: str _process: subprocess.Popen | None - def __init__(self, repo_config: RepoConfig, git_access_token: str | None, host: str = "127.0.0.1", port: int = SANDBOX_SERVER_PORT): + def __init__(self, server_path: str = EPHEMERAL_SERVER_PATH, host: str = "127.0.0.1", port: int = DEFAULT_SERVER_PORT): self.host = host self.port = port self.base_url = f"http://{host}:{port}" self._process = None - self._start_server(repo_config, git_access_token) + self._start_server(server_path) + + def __del__(self): + """Cleanup the subprocess when the client is destroyed""" + if self._process is not None: + self._process.terminate() + self._process.wait() + + def _get_envs(self): + return os.environ.copy() - def _start_server(self, repo_config: RepoConfig, git_access_token: str | None) -> None: + def _start_server(self, server_path: str) -> None: """Start the FastAPI server in a subprocess""" - env = os.environ.copy() - env.update( - { - "CODEGEN_REPOSITORY__REPO_PATH": repo_config.repo_path, - "CODEGEN_REPOSITORY__REPO_NAME": repo_config.name, - "CODEGEN_REPOSITORY__FULL_NAME": repo_config.full_name, - "CODEGEN_REPOSITORY__LANGUAGE": repo_config.language.value, - "CODEGEN_SECRETS__GITHUB_TOKEN": git_access_token, - } - ) + envs = self._get_envs() + logger.info(f"Starting local server on {self.base_url} with envvars: {envs}") - logger.info(f"Starting local sandbox server on {self.base_url} with repo setup in base_dir {repo_config.base_dir}") self._process = subprocess.Popen( [ "uvicorn", - "codegen.runner.sandbox.server:app", + server_path, "--host", self.host, "--port", str(self.port), ], - env=env, + env=envs, ) self._wait_for_server() @@ -60,19 +61,20 @@ def _wait_for_server(self, timeout: int = 60, interval: float = 0.1) -> None: """Wait for the server to start by polling the health endpoint""" start_time = time.time() while (time.time() - start_time) < timeout: - try: - self.get("/") + if self.healthcheck(raise_on_error=False): return - except requests.ConnectionError: - time.sleep(interval) + time.sleep(interval) msg = "Server failed to start within timeout period" raise TimeoutError(msg) - def __del__(self): - """Cleanup the subprocess when the client is destroyed""" - if self._process is not None: - self._process.terminate() - self._process.wait() + def healthcheck(self, raise_on_error: bool = True) -> bool: + try: + self.get("/") + return True + except requests.exceptions.ConnectionError: + if raise_on_error: + raise + return False def get(self, endpoint: str, data: dict | None = None) -> requests.Response: url = f"{self.base_url}{endpoint}" diff --git a/tests/integration/codegen/runner/conftest.py b/tests/integration/codegen/runner/conftest.py index 1b1d43d6b..c71cf54a0 100644 --- a/tests/integration/codegen/runner/conftest.py +++ b/tests/integration/codegen/runner/conftest.py @@ -8,7 +8,7 @@ from codegen.git.clients.git_repo_client import GitRepoClient from codegen.git.repo_operator.remote_repo_operator import RemoteRepoOperator from codegen.git.schemas.repo_config import RepoConfig -from codegen.runner.clients.sandbox_client import SandboxClient +from codegen.runner.clients.codebase_client import CodebaseClient from codegen.shared.configs.session_configs import config from codegen.shared.enums.programming_language import ProgrammingLanguage @@ -45,7 +45,7 @@ def git_repo_client(repo_config: RepoConfig) -> Generator[GitRepoClient, None, N @pytest.fixture -def sandbox_client(repo_config: RepoConfig, get_free_port) -> Generator[SandboxClient, None, None]: - sb_client = SandboxClient(repo_config=repo_config, port=get_free_port, git_access_token=config.secrets.github_token) +def codebase_client(repo_config: RepoConfig, get_free_port) -> Generator[CodebaseClient, None, None]: + sb_client = CodebaseClient(repo_config=repo_config, port=get_free_port, git_access_token=config.secrets.github_token) sb_client.runner = Mock() yield sb_client diff --git a/tests/integration/codegen/runner/test_create_branch.py b/tests/integration/codegen/runner/test_create_branch.py index f93a94ae5..8d19b26cf 100644 --- a/tests/integration/codegen/runner/test_create_branch.py +++ b/tests/integration/codegen/runner/test_create_branch.py @@ -5,14 +5,14 @@ from codegen.git.clients.git_repo_client import GitRepoClient from codegen.git.repo_operator.remote_repo_operator import RemoteRepoOperator -from codegen.runner.clients.sandbox_client import SandboxClient +from codegen.runner.clients.codebase_client import CodebaseClient from codegen.runner.models.apis import BRANCH_ENDPOINT, CreateBranchRequest, CreateBranchResponse from codegen.runner.models.codemod import BranchConfig, Codemod, GroupingConfig @pytest.mark.asyncio @pytest.mark.timeout(60) -async def test_create_branch(sandbox_client: SandboxClient, git_repo_client: GitRepoClient, op: RemoteRepoOperator): +async def test_create_branch(codebase_client: CodebaseClient, git_repo_client: GitRepoClient, op: RemoteRepoOperator): # set-up codemod_source = """ for file in codebase.files: @@ -29,7 +29,7 @@ async def test_create_branch(sandbox_client: SandboxClient, git_repo_client: Git ) # execute - response = sandbox_client.post(endpoint=BRANCH_ENDPOINT, data=request.model_dump()) + response = codebase_client.post(endpoint=BRANCH_ENDPOINT, data=request.model_dump()) assert response.status_code == HTTPStatus.OK # verify diff --git a/tests/integration/codegen/runner/test_create_branch_with_grouping.py b/tests/integration/codegen/runner/test_create_branch_with_grouping.py index a41b4be51..7bbb7069a 100644 --- a/tests/integration/codegen/runner/test_create_branch_with_grouping.py +++ b/tests/integration/codegen/runner/test_create_branch_with_grouping.py @@ -5,7 +5,7 @@ from codegen.git.clients.git_repo_client import GitRepoClient from codegen.git.repo_operator.remote_repo_operator import RemoteRepoOperator -from codegen.runner.clients.sandbox_client import SandboxClient +from codegen.runner.clients.codebase_client import CodebaseClient from codegen.runner.models.apis import BRANCH_ENDPOINT, CreateBranchRequest, CreateBranchResponse from codegen.runner.models.codemod import BranchConfig, Codemod, GroupingConfig from codegen.sdk.codebase.flagging.groupers.enums import GroupBy @@ -13,7 +13,7 @@ @pytest.mark.timeout(120) @pytest.mark.parametrize("group_by", [GroupBy.INSTANCE, GroupBy.FILE]) -def test_create_branch_with_grouping(sandbox_client: SandboxClient, git_repo_client: GitRepoClient, op: RemoteRepoOperator, group_by: GroupBy): +def test_create_branch_with_grouping(codebase_client: CodebaseClient, git_repo_client: GitRepoClient, op: RemoteRepoOperator, group_by: GroupBy): codemod_source = """ for file in codebase.files[:5]: flag = codebase.flag_instance(file) @@ -31,7 +31,7 @@ def test_create_branch_with_grouping(sandbox_client: SandboxClient, git_repo_cli ) # execute - response = sandbox_client.post(endpoint=BRANCH_ENDPOINT, data=request.model_dump()) + response = codebase_client.post(endpoint=BRANCH_ENDPOINT, data=request.model_dump()) assert response.status_code == HTTPStatus.OK # verify From 5d87ebf3573fd87ab71bb80606bd89a02be04de1 Mon Sep 17 00:00:00 2001 From: Carol Jung Date: Tue, 18 Feb 2025 16:38:10 -0800 Subject: [PATCH 2/2] wip --- src/codegen/runner/clients/codebase_client.py | 29 ++++++++++++------- src/codegen/runner/clients/server_client.py | 4 +-- src/codegen/runner/sandbox/server.py | 2 +- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/codegen/runner/clients/codebase_client.py b/src/codegen/runner/clients/codebase_client.py index 186ddc316..f57400666 100644 --- a/src/codegen/runner/clients/codebase_client.py +++ b/src/codegen/runner/clients/codebase_client.py @@ -5,6 +5,7 @@ from codegen.git.schemas.repo_config import RepoConfig from codegen.runner.clients.server_client import LocalServerClient from codegen.runner.models.apis import SANDBOX_SERVER_PORT +from codegen.shared.configs.session_configs import config logger = logging.getLogger(__name__) @@ -22,15 +23,23 @@ def __init__(self, repo_config: RepoConfig, git_access_token: str | None, host: self.git_access_token = git_access_token super().__init__(server_path=RUNNER_SERVER_PATH, host=host, port=port) - def _get_envs(self): + def _get_envs(self) -> dict: envs = super()._get_envs() - envs.update( - { - "CODEGEN_REPOSITORY__REPO_PATH": self.repo_config.repo_path, - "CODEGEN_REPOSITORY__REPO_NAME": self.repo_config.name, - "CODEGEN_REPOSITORY__FULL_NAME": self.repo_config.full_name, - "CODEGEN_REPOSITORY__LANGUAGE": self.repo_config.language.value, - "CODEGEN_SECRETS__GITHUB_TOKEN": self.git_access_token, - } - ) + codebase_envs = { + "CODEGEN_REPOSITORY__REPO_PATH": self.repo_config.repo_path, + "CODEGEN_REPOSITORY__REPO_NAME": self.repo_config.name, + "CODEGEN_REPOSITORY__FULL_NAME": self.repo_config.full_name, + "CODEGEN_REPOSITORY__LANGUAGE": self.repo_config.language.value, + } + if self.git_access_token is not None: + codebase_envs["CODEGEN_SECRETS__GITHUB_TOKEN"] = self.git_access_token + + envs.update(codebase_envs) return envs + + +if __name__ == "__main__": + test_config = RepoConfig.from_repo_path("/Users/caroljung/git/codegen/codegen-agi") + test_config.full_name = "codegen-sh/codegen-agi" + client = CodebaseClient(test_config, config.secrets.github_token) + print(client.healthcheck()) diff --git a/src/codegen/runner/clients/server_client.py b/src/codegen/runner/clients/server_client.py index 5ae68db0e..a9f5bd57a 100644 --- a/src/codegen/runner/clients/server_client.py +++ b/src/codegen/runner/clients/server_client.py @@ -36,7 +36,7 @@ def __del__(self): self._process.terminate() self._process.wait() - def _get_envs(self): + def _get_envs(self) -> dict: return os.environ.copy() def _start_server(self, server_path: str) -> None: @@ -57,7 +57,7 @@ def _start_server(self, server_path: str) -> None: ) self._wait_for_server() - def _wait_for_server(self, timeout: int = 60, interval: float = 0.1) -> None: + def _wait_for_server(self, timeout: int = 10, interval: float = 0.1) -> None: """Wait for the server to start by polling the health endpoint""" start_time = time.time() while (time.time() - start_time) < timeout: diff --git a/src/codegen/runner/sandbox/server.py b/src/codegen/runner/sandbox/server.py index d0236d3a5..28971c016 100644 --- a/src/codegen/runner/sandbox/server.py +++ b/src/codegen/runner/sandbox/server.py @@ -39,7 +39,7 @@ async def lifespan(server: FastAPI): global runner try: - server_info = ServerInfo(repo_name=config.repository.full_name) + server_info = ServerInfo(repo_name=config.repository.full_name or config.repository.repo_name) logger.info(f"Starting up sandbox fastapi server for repo_name={server_info.repo_name}") repo_config = RepoConfig( name=config.repository.repo_name,