diff --git a/docs/tutorials/codebase-analytics-dashboard.mdx b/docs/tutorials/codebase-analytics-dashboard.mdx index a1bcbc359..b79f52c0e 100644 --- a/docs/tutorials/codebase-analytics-dashboard.mdx +++ b/docs/tutorials/codebase-analytics-dashboard.mdx @@ -5,7 +5,7 @@ icon: "calculator" iconType: "solid" --- -This tutorial explains how codebase metrics are effiently calculated using the `codegen` library in the Codebase Analytics Dashboard. The metrics include indeces of codebase maintainabilith and complexity. +This tutorial explains how codebase metrics are efficiently calculated using the `codegen` library in the Codebase Analytics Dashboard. The metrics include indices of codebase maintainabilith and complexity. View the full code and setup instructions in our [codebase-analytics repository](https://github.com/codegen-sh/codebase-analytics). diff --git a/pyproject.toml b/pyproject.toml index bf3d7d71d..8a3c851a0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,6 +72,7 @@ dependencies = [ "langchain-anthropic>=0.3.7", "lox>=0.12.0", "httpx>=0.28.1", + "docker>=6.1.3", ] license = { text = "Apache-2.0" } diff --git a/src/codegen/cli/commands/start/docker_container.py b/src/codegen/cli/commands/start/docker_container.py new file mode 100644 index 000000000..53ae3a339 --- /dev/null +++ b/src/codegen/cli/commands/start/docker_container.py @@ -0,0 +1,32 @@ +import docker + + +class DockerContainer: + _client: docker.DockerClient + host: str | None + port: int | None + name: str + + def __init__(self, client: docker.DockerClient, name: str, port: int | None = None, host: str | None = None): + self._client = client + self.host = host + self.port = port + self.name = name + + def is_running(self) -> bool: + try: + container = self._client.containers.get(self.name) + return container.status == "running" + except docker.errors.NotFound: + return False + + def start(self) -> bool: + try: + container = self._client.containers.get(self.name) + container.start() + return True + except (docker.errors.NotFound, docker.errors.APIError): + return False + + def __str__(self) -> str: + return f"DockerSession(name={self.name}, host={self.host or 'unknown'}, port={self.port or 'unknown'})" diff --git a/src/codegen/cli/commands/start/docker_fleet.py b/src/codegen/cli/commands/start/docker_fleet.py new file mode 100644 index 000000000..dd5907119 --- /dev/null +++ b/src/codegen/cli/commands/start/docker_fleet.py @@ -0,0 +1,46 @@ +import docker + +from codegen.cli.commands.start.docker_container import DockerContainer + +CODEGEN_RUNNER_IMAGE = "codegen-runner" + + +class DockerFleet: + containers: list[DockerContainer] + + def __init__(self, containers: list[DockerContainer]): + self.containers = containers + + @classmethod + def load(cls) -> "DockerFleet": + try: + client = docker.from_env() + containers = client.containers.list(all=True, filters={"ancestor": CODEGEN_RUNNER_IMAGE}) + codegen_containers = [] + for container in containers: + if container.attrs["Config"]["Image"] == CODEGEN_RUNNER_IMAGE: + if container.status == "running": + host_config = next(iter(container.ports.values()))[0] + codegen_container = DockerContainer(client=client, host=host_config["HostIp"], port=host_config["HostPort"], name=container.name) + else: + codegen_container = DockerContainer(client=client, name=container.name) + codegen_containers.append(codegen_container) + + return cls(containers=codegen_containers) + except docker.errors.NotFound: + return cls(containers=[]) + + @property + def active_containers(self) -> list[DockerContainer]: + return [container for container in self.containers if container.is_running()] + + def get(self, name: str) -> DockerContainer | None: + return next((container for container in self.containers if container.name == name), None) + + def __str__(self) -> str: + return f"DockerFleet(containers={',\n'.join(str(container) for container in self.containers)})" + + +if __name__ == "__main__": + pool = DockerFleet.load() + print(pool) diff --git a/src/codegen/cli/commands/start/main.py b/src/codegen/cli/commands/start/main.py index 86f8c384d..4a4a531d0 100644 --- a/src/codegen/cli/commands/start/main.py +++ b/src/codegen/cli/commands/start/main.py @@ -7,18 +7,27 @@ from rich.box import ROUNDED from rich.panel import Panel +from codegen.cli.commands.start.docker_container import DockerContainer +from codegen.cli.commands.start.docker_fleet import CODEGEN_RUNNER_IMAGE, DockerFleet from codegen.configs.models.secrets import SecretsConfig from codegen.git.repo_operator.local_git_repo import LocalGitRepo from codegen.git.schemas.repo_config import RepoConfig from codegen.shared.network.port import get_free_port +_default_host = "0.0.0.0" + @click.command(name="start") @click.option("--platform", "-t", type=click.Choice(["linux/amd64", "linux/arm64", "linux/amd64,linux/arm64"]), default="linux/amd64,linux/arm64", help="Target platform(s) for the Docker image") @click.option("--port", "-p", type=int, default=None, help="Port to run the server on") -@click.option("--detached", "-d", is_flag=True, default=False, help="Starts up the server as detached background process") -def start_command(port: int | None, platform: str, detached: bool): +def start_command(port: int | None, platform: str): """Starts a local codegen server""" + repo_path = Path.cwd().resolve() + repo_config = RepoConfig.from_repo_path(str(repo_path)) + fleet = DockerFleet.load() + if (container := fleet.get(repo_config.name)) is not None: + return _handle_existing_container(repo_config, container) + codegen_version = version("codegen") rich.print(f"[bold green]Codegen version:[/bold green] {codegen_version}") codegen_root = Path(__file__).parent.parent.parent.parent.parent.parent @@ -29,8 +38,9 @@ def start_command(port: int | None, platform: str, detached: bool): rich.print("[bold blue]Building Docker image...[/bold blue]") _build_docker_image(codegen_root, platform) rich.print("[bold blue]Starting Docker container...[/bold blue]") - _run_docker_container(port, detached) - rich.print(Panel(f"[green]Server started successfully![/green]\nAccess the server at: [bold]http://0.0.0.0:{port}[/bold]", box=ROUNDED, title="Codegen Server")) + _run_docker_container(repo_config, port) + rich.print(Panel(f"[green]Server started successfully![/green]\nAccess the server at: [bold]http://{_default_host}:{port}[/bold]", box=ROUNDED, title="Codegen Server")) + # TODO: memory snapshot here except subprocess.CalledProcessError as e: rich.print(f"[bold red]Error:[/bold red] Failed to {e.cmd[0]} Docker container") raise click.Abort() @@ -39,7 +49,26 @@ def start_command(port: int | None, platform: str, detached: bool): raise click.Abort() -def _build_docker_image(codegen_root: Path, platform: str): +def _handle_existing_container(repo_config: RepoConfig, container: DockerContainer) -> None: + if container.is_running(): + rich.print( + Panel( + f"[green]Codegen server for {repo_config.name} is already running at: [bold]http://{container.host}:{container.port}[/bold][/green]", + box=ROUNDED, + title="Codegen Server", + ) + ) + return + + if container.start(): + rich.print(Panel(f"[yellow]Docker container for {repo_config.name} is not running. Restarting...[/yellow]", box=ROUNDED, title="Docker Session")) + return + + rich.print(Panel(f"[red]Failed to restart container for {repo_config.name}[/red]", box=ROUNDED, title="Docker Session")) + click.Abort() + + +def _build_docker_image(codegen_root: Path, platform: str) -> None: build_cmd = [ "docker", "buildx", @@ -57,21 +86,19 @@ def _build_docker_image(codegen_root: Path, platform: str): subprocess.run(build_cmd, check=True) -def _run_docker_container(port: int, detached: bool): - repo_path = Path.cwd().resolve() - repo_config = RepoConfig.from_repo_path(repo_path) +def _run_docker_container(repo_config: RepoConfig, port: int) -> None: container_repo_path = f"/app/git/{repo_config.name}" + name_args = ["--name", f"{repo_config.name}"] envvars = { "REPOSITORY_LANGUAGE": repo_config.language.value, - "REPOSITORY_OWNER": LocalGitRepo(repo_path).owner, + "REPOSITORY_OWNER": LocalGitRepo(repo_config.repo_path).owner, "REPOSITORY_PATH": container_repo_path, "GITHUB_TOKEN": SecretsConfig().github_token, } envvars_args = [arg for k, v in envvars.items() for arg in ("--env", f"{k}={v}")] - mount_args = ["-v", f"{repo_path}:{container_repo_path}"] - run_mode = "-d" if detached else "-it" - entry_point = f"uv run --frozen uvicorn codegen.runner.sandbox.server:app --host 0.0.0.0 --port {port}" - run_cmd = ["docker", "run", run_mode, "-p", f"{port}:{port}", *mount_args, *envvars_args, "codegen-runner", entry_point] + mount_args = ["-v", f"{repo_config.repo_path}:{container_repo_path}"] + entry_point = f"uv run --frozen uvicorn codegen.runner.sandbox.server:app --host {_default_host} --port {port}" + run_cmd = ["docker", "run", "-d", "-p", f"{port}:{port}", *name_args, *mount_args, *envvars_args, CODEGEN_RUNNER_IMAGE, entry_point] rich.print(f"run_cmd: {str.join(' ', run_cmd)}") subprocess.run(run_cmd, check=True) diff --git a/src/codegen/extensions/clients/linear.py b/src/codegen/extensions/clients/linear.py index 3a72c9789..a059853e5 100644 --- a/src/codegen/extensions/clients/linear.py +++ b/src/codegen/extensions/clients/linear.py @@ -114,7 +114,7 @@ def comment_on_issue(self, issue_id: str, body: str) -> dict: comment_data = data["data"]["commentCreate"]["comment"] return comment_data - except: + except Exception: msg = f"Error creating comment\n{data}" raise Exception(msg) diff --git a/src/codegen/runner/clients/client.py b/src/codegen/runner/clients/client.py new file mode 100644 index 000000000..555819f10 --- /dev/null +++ b/src/codegen/runner/clients/client.py @@ -0,0 +1,47 @@ +"""Client used to abstract the weird stdin/stdout communication we have with the sandbox""" + +import logging + +import requests +from fastapi import params + +logger = logging.getLogger(__name__) + +DEFAULT_SERVER_PORT = 4002 + +EPHEMERAL_SERVER_PATH = "codegen.runner.sandbox.ephemeral_server:app" + + +class Client: + """Client for interacting with the sandbox server.""" + + host: str + port: int + base_url: str + + def __init__(self, host: str, port: int) -> None: + self.host = host + self.port = port + self.base_url = f"http://{host}:{port}" + + def healthcheck(self, raise_on_error: bool = True) -> bool: + try: + self.get("/") + return True + except requests.exceptions.ConnectionError: + if raise_on_error: + raise + return False + + def get(self, endpoint: str, data: dict | None = None) -> requests.Response: + url = f"{self.base_url}{endpoint}" + response = requests.get(url, json=data) + response.raise_for_status() + return response + + def post(self, endpoint: str, data: dict | None = None, authorization: str | params.Header | None = None) -> requests.Response: + url = f"{self.base_url}{endpoint}" + headers = {"Authorization": str(authorization)} if authorization else None + response = requests.post(url, json=data, headers=headers) + response.raise_for_status() + return response diff --git a/src/codegen/runner/clients/codebase_client.py b/src/codegen/runner/clients/codebase_client.py index 5cbcdb992..b363b10ad 100644 --- a/src/codegen/runner/clients/codebase_client.py +++ b/src/codegen/runner/clients/codebase_client.py @@ -1,28 +1,70 @@ """Client used to abstract the weird stdin/stdout communication we have with the sandbox""" import logging +import os +import subprocess +import time from codegen.configs.models.secrets import SecretsConfig from codegen.git.schemas.repo_config import RepoConfig -from codegen.runner.clients.server_client import LocalServerClient +from codegen.runner.clients.client import Client from codegen.runner.models.apis import SANDBOX_SERVER_PORT -logger = logging.getLogger(__name__) - +DEFAULT_SERVER_PORT = 4002 +EPHEMERAL_SERVER_PATH = "codegen.runner.sandbox.ephemeral_server:app" RUNNER_SERVER_PATH = "codegen.runner.sandbox.server:app" -class CodebaseClient(LocalServerClient): +logger = logging.getLogger(__name__) + + +class CodebaseClient(Client): """Client for interacting with the locally hosted sandbox server.""" repo_config: RepoConfig - def __init__(self, repo_config: RepoConfig, host: str = "127.0.0.1", port: int = SANDBOX_SERVER_PORT): + def __init__(self, repo_config: RepoConfig, host: str = "127.0.0.1", port: int = SANDBOX_SERVER_PORT, server_path: str = RUNNER_SERVER_PATH): + super().__init__(host=host, port=port) self.repo_config = repo_config - super().__init__(server_path=RUNNER_SERVER_PATH, host=host, port=port) + self._process = None + self._start_server(server_path) + + def __del__(self): + """Cleanup the subprocess when the client is destroyed""" + if self._process is not None: + self._process.terminate() + self._process.wait() + + def _start_server(self, server_path: str) -> None: + """Start the FastAPI server in a subprocess""" + envs = self._get_envs() + logger.info(f"Starting local server on {self.base_url} with envvars: {envs}") + + self._process = subprocess.Popen( + [ + "uvicorn", + server_path, + "--host", + self.host, + "--port", + str(self.port), + ], + env=envs, + ) + self._wait_for_server() + + def _wait_for_server(self, timeout: int = 30, interval: float = 0.3) -> None: + """Wait for the server to start by polling the health endpoint""" + start_time = time.time() + while (time.time() - start_time) < timeout: + if self.healthcheck(raise_on_error=False): + return + time.sleep(interval) + msg = "Server failed to start within timeout period" + raise TimeoutError(msg) def _get_envs(self) -> dict: - envs = super()._get_envs() + envs = os.environ.copy() codebase_envs = { "REPOSITORY_LANGUAGE": self.repo_config.language.value, "REPOSITORY_OWNER": self.repo_config.organization_name, diff --git a/src/codegen/runner/clients/docker_client.py b/src/codegen/runner/clients/docker_client.py new file mode 100644 index 000000000..eaf19fa36 --- /dev/null +++ b/src/codegen/runner/clients/docker_client.py @@ -0,0 +1,31 @@ +"""Client for interacting with the locally hosted sandbox server hosted on a docker container.""" + +from codegen.cli.commands.start.docker_container import DockerContainer +from codegen.cli.commands.start.docker_fleet import DockerFleet +from codegen.runner.clients.client import Client +from codegen.runner.models.apis import DIFF_ENDPOINT, GetDiffRequest +from codegen.runner.models.codemod import Codemod + + +class DockerClient(Client): + """Client for interacting with the locally hosted sandbox server hosted on a docker container.""" + + def __init__(self, container: DockerContainer): + if not container.is_running() or container.host is None or container.port is None: + msg = f"Container {container.name} is not running." + raise Exception(msg) + super().__init__(container.host, container.port) + + +if __name__ == "__main__": + fleet = DockerFleet.load() + cur = next((container for container in fleet.containers if container.is_running()), None) + if cur is None: + msg = "No running container found. Run `codegen start` from a git repo first." + raise Exception(msg) + client = DockerClient(cur) + print(f"healthcheck: {client.healthcheck()}") + codemod = Codemod(user_code="print(codebase)") + diff_req = GetDiffRequest(codemod=codemod) + res = client.post(DIFF_ENDPOINT, diff_req.model_dump()) + print(res.json()) diff --git a/src/codegen/runner/clients/server_client.py b/src/codegen/runner/clients/server_client.py deleted file mode 100644 index e904582cd..000000000 --- a/src/codegen/runner/clients/server_client.py +++ /dev/null @@ -1,90 +0,0 @@ -"""Client used to abstract the weird stdin/stdout communication we have with the sandbox""" - -import logging -import os -import subprocess -import time - -import requests -from fastapi import params - -logger = logging.getLogger(__name__) - -DEFAULT_SERVER_PORT = 4002 - -EPHEMERAL_SERVER_PATH = "codegen.runner.sandbox.ephemeral_server:app" - - -class LocalServerClient: - """Client for interacting with the sandbox server.""" - - host: str - port: int - base_url: str - _process: subprocess.Popen | None - - def __init__(self, server_path: str = EPHEMERAL_SERVER_PATH, host: str = "127.0.0.1", port: int = DEFAULT_SERVER_PORT): - self.host = host - self.port = port - self.base_url = f"http://{host}:{port}" - self._process = None - self._start_server(server_path) - - def __del__(self): - """Cleanup the subprocess when the client is destroyed""" - if self._process is not None: - self._process.terminate() - self._process.wait() - - def _get_envs(self) -> dict: - return os.environ.copy() - - def _start_server(self, server_path: str) -> None: - """Start the FastAPI server in a subprocess""" - envs = self._get_envs() - logger.info(f"Starting local server on {self.base_url} with envvars: {envs}") - - self._process = subprocess.Popen( - [ - "uvicorn", - server_path, - "--host", - self.host, - "--port", - str(self.port), - ], - env=envs, - ) - self._wait_for_server() - - def _wait_for_server(self, timeout: int = 30, interval: float = 0.3) -> None: - """Wait for the server to start by polling the health endpoint""" - start_time = time.time() - while (time.time() - start_time) < timeout: - if self.healthcheck(raise_on_error=False): - return - time.sleep(interval) - msg = "Server failed to start within timeout period" - raise TimeoutError(msg) - - def healthcheck(self, raise_on_error: bool = True) -> bool: - try: - self.get("/") - return True - except requests.exceptions.ConnectionError: - if raise_on_error: - raise - return False - - def get(self, endpoint: str, data: dict | None = None) -> requests.Response: - url = f"{self.base_url}{endpoint}" - response = requests.get(url, json=data) - response.raise_for_status() - return response - - def post(self, endpoint: str, data: dict | None = None, authorization: str | params.Header | None = None) -> requests.Response: - url = f"{self.base_url}{endpoint}" - headers = {"Authorization": str(authorization)} if authorization else None - response = requests.post(url, json=data, headers=headers) - response.raise_for_status() - return response diff --git a/uv.lock b/uv.lock index c98eec55f..6e6175e1f 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,4 @@ version = 1 -revision = 1 requires-python = ">=3.12, <3.14" resolution-markers = [ "python_full_version >= '3.12.4'", @@ -546,12 +545,14 @@ dependencies = [ { name = "dataclasses-json" }, { name = "datamodel-code-generator" }, { name = "dicttoxml" }, + { name = "docker" }, { name = "docstring-parser" }, { name = "fastapi", extra = ["standard"] }, { name = "gitpython" }, { name = "giturlparse" }, { name = "hatch-vcs" }, { name = "hatchling" }, + { name = "httpx" }, { name = "humanize" }, { name = "langchain", extra = ["openai"] }, { name = "langchain-anthropic" }, @@ -667,12 +668,14 @@ requires-dist = [ { name = "dataclasses-json", specifier = ">=0.6.4,<1.0.0" }, { name = "datamodel-code-generator", specifier = ">=0.26.5" }, { name = "dicttoxml", specifier = ">=1.7.16,<2.0.0" }, + { name = "docker", specifier = ">=6.1.3" }, { name = "docstring-parser", specifier = ">=0.16,<1.0" }, { name = "fastapi", extras = ["standard"], specifier = ">=0.115.2,<1.0.0" }, { name = "gitpython", specifier = "==3.1.44" }, { name = "giturlparse" }, { name = "hatch-vcs", specifier = ">=0.4.0" }, { name = "hatchling", specifier = ">=1.25.0" }, + { name = "httpx", specifier = ">=0.28.1" }, { name = "humanize", specifier = ">=4.10.0,<5.0.0" }, { name = "langchain", extras = ["openai"] }, { name = "langchain-anthropic", specifier = ">=0.3.7" }, @@ -733,7 +736,6 @@ requires-dist = [ { name = "wrapt", specifier = ">=1.16.0,<2.0.0" }, { name = "xmltodict", specifier = ">=0.13.0,<1.0.0" }, ] -provides-extras = ["lsp", "types"] [package.metadata.requires-dev] dev = [ @@ -1064,6 +1066,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/68/1b/e0a87d256e40e8c888847551b20a017a6b98139178505dc7ffb96f04e954/dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86", size = 313632 }, ] +[[package]] +name = "docker" +version = "7.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pywin32", marker = "sys_platform == 'win32'" }, + { name = "requests" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/91/9b/4a2ea29aeba62471211598dac5d96825bb49348fa07e906ea930394a83ce/docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c", size = 117834 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/26/57c6fb270950d476074c087527a558ccb6f4436657314bfb6cdf484114c4/docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0", size = 147774 }, +] + [[package]] name = "docstring-parser" version = "0.16"