diff --git a/src/codegen/git/repo_operator/local_git_repo.py b/src/codegen/git/repo_operator/local_git_repo.py index 73d4b9041..8674e5b33 100644 --- a/src/codegen/git/repo_operator/local_git_repo.py +++ b/src/codegen/git/repo_operator/local_git_repo.py @@ -60,13 +60,14 @@ def user_email(self) -> str | None: def get_language(self, access_token: str | None = None) -> str: """Returns the majority language of the repository""" - if access_token is None: - return str(determine_project_language(self.repo_path)) - - repo_config = RepoConfig.from_repo_path(repo_path=self.repo_path) - repo_config.full_name = self.full_name - remote_git = GitRepoClient(repo_config=repo_config, access_token=access_token) - return remote_git.repo.language.upper() + if access_token is not None: + repo_config = RepoConfig.from_repo_path(repo_path=self.repo_path) + repo_config.full_name = self.full_name + remote_git = GitRepoClient(repo_config=repo_config, access_token=access_token) + if (language := remote_git.repo.language) is not None: + return language.upper() + + return str(determine_project_language(self.repo_path)) def has_remote(self) -> bool: return bool(self.git_cli.remotes) diff --git a/src/codegen/git/schemas/repo_config.py b/src/codegen/git/schemas/repo_config.py index f7d152cd7..3205c770e 100644 --- a/src/codegen/git/schemas/repo_config.py +++ b/src/codegen/git/schemas/repo_config.py @@ -1,4 +1,3 @@ -import base64 import logging import os.path @@ -15,19 +14,14 @@ class RepoConfig(BaseModel): name: str full_name: str | None = None - organization_name: str | None = None visibility: RepoVisibility | None = None # Codebase fields base_dir: str = "/tmp" # parent directory of the git repo + language: ProgrammingLanguage = ProgrammingLanguage.PYTHON + respect_gitignore: bool = True base_path: str | None = None # root directory of the codebase within the repo - language: ProgrammingLanguage | None = ProgrammingLanguage.PYTHON subdirectories: list[str] | None = None - respect_gitignore: bool = True - - @property - def repo_path(self) -> str: - return f"{self.base_dir}/{self.name}" @classmethod def from_repo_path(cls, repo_path: str) -> "RepoConfig": @@ -35,12 +29,12 @@ def from_repo_path(cls, repo_path: str) -> "RepoConfig": base_dir = os.path.dirname(repo_path) return cls(name=name, base_dir=base_dir) - # TODO: remove - def encoded_json(self): - return base64.b64encode(self.model_dump_json().encode("utf-8")).decode("utf-8") + @property + def repo_path(self) -> str: + return f"{self.base_dir}/{self.name}" - # TODO: remove, read from shared config instead - @staticmethod - def from_encoded_json(encoded_json: str) -> "RepoConfig": - decoded = base64.b64decode(encoded_json).decode("utf-8") - return RepoConfig.model_validate_json(decoded) + @property + def organization_name(self) -> str | None: + if self.full_name is not None: + return self.full_name.split("/")[0] + return None diff --git a/src/codegen/runner/clients/sandbox_client.py b/src/codegen/runner/clients/sandbox_client.py index 7860f1566..7c0004400 100644 --- a/src/codegen/runner/clients/sandbox_client.py +++ b/src/codegen/runner/clients/sandbox_client.py @@ -9,9 +9,7 @@ from fastapi import params from codegen.git.schemas.repo_config import RepoConfig -from codegen.runner.constants.envvars import FEATURE_FLAGS_BASE64, GITHUB_TOKEN, REPO_CONFIG_BASE64 from codegen.runner.models.apis import SANDBOX_SERVER_PORT -from codegen.runner.models.configs import RunnerFeatureFlags logger = logging.getLogger(__name__) @@ -33,15 +31,14 @@ def __init__(self, repo_config: RepoConfig, git_access_token: str | None, host: def _start_server(self, repo_config: RepoConfig, git_access_token: str | None) -> None: """Start the FastAPI server in a subprocess""" - # encoded_flags = runner_flags_from_posthog(repo_config.name).encoded_json() # TODO: once migrated to dockerized image, uncomment this line - encoded_flags = RunnerFeatureFlags().encoded_json() env = os.environ.copy() env.update( { - REPO_CONFIG_BASE64: repo_config.encoded_json(), - FEATURE_FLAGS_BASE64: encoded_flags, - "OPENAI_PASS": "open-ai-password", - GITHUB_TOKEN: git_access_token, + "CODEGEN_REPOSITORY__REPO_PATH": repo_config.repo_path, + "CODEGEN_REPOSITORY__REPO_NAME": repo_config.name, + "CODEGEN_REPOSITORY__FULL_NAME": repo_config.full_name, + "CODEGEN_REPOSITORY__LANGUAGE": repo_config.language.value, + "CODEGEN_SECRETS__GITHUB_TOKEN": git_access_token, } ) diff --git a/src/codegen/runner/models/configs.py b/src/codegen/runner/models/configs.py deleted file mode 100644 index a2e4f92fa..000000000 --- a/src/codegen/runner/models/configs.py +++ /dev/null @@ -1,56 +0,0 @@ -import base64 -import os - -from pydantic import BaseModel -from pydantic.config import ConfigDict - -from codegen.git.schemas.repo_config import RepoConfig -from codegen.runner.constants.envvars import FEATURE_FLAGS_BASE64, REPO_CONFIG_BASE64 -from codegen.sdk.codebase.config import CodebaseConfig, GSFeatureFlags -from codegen.sdk.secrets import Secrets - - -class RunnerFeatureFlags(BaseModel): - """Feature flags for a runner""" - - model_config = ConfigDict(frozen=True) - - sync_enabled: bool = True - track_graph: bool = False - verify_graph: bool = False - - ts_language_engine: bool = False - v8_ts_engine: bool = False - ts_dependency_manager: bool = False - - import_resolution_overrides: dict[str, str] = {} - - def encoded_json(self): - return base64.b64encode(self.model_dump_json().encode("utf-8")).decode("utf-8") - - @staticmethod - def from_encoded_json(encoded_json: str) -> "RunnerFeatureFlags": - decoded = base64.b64decode(encoded_json).decode("utf-8") - return RunnerFeatureFlags.model_validate_json(decoded) - - -def get_codebase_config() -> CodebaseConfig: - gs_ffs = GSFeatureFlags(**get_runner_feature_flags().model_dump()) - secrets = Secrets(openai_key=os.environ["OPENAI_PASS"]) - return CodebaseConfig(secrets=secrets, feature_flags=gs_ffs) - - -def get_runner_feature_flags() -> RunnerFeatureFlags: - encoded_ffs = os.environ.get(FEATURE_FLAGS_BASE64) - if not encoded_ffs: - msg = "FEATURE_FLAGS_BASE64 environment variable not found" - raise ValueError(msg) - return RunnerFeatureFlags.from_encoded_json(encoded_ffs) - - -def get_repo_config() -> RepoConfig: - encoded_repo_config = os.environ.get(REPO_CONFIG_BASE64) - if not encoded_repo_config: - msg = "REPO_CONFIG_BASE64 environment variable not found" - raise ValueError(msg) - return RepoConfig.from_encoded_json(encoded_repo_config) diff --git a/src/codegen/runner/sandbox/runner.py b/src/codegen/runner/sandbox/runner.py index 40340d47c..94242379c 100644 --- a/src/codegen/runner/sandbox/runner.py +++ b/src/codegen/runner/sandbox/runner.py @@ -6,11 +6,11 @@ from codegen.git.repo_operator.remote_repo_operator import RemoteRepoOperator from codegen.git.schemas.repo_config import RepoConfig from codegen.runner.models.apis import CreateBranchRequest, CreateBranchResponse, GetDiffRequest, GetDiffResponse -from codegen.runner.models.configs import get_codebase_config from codegen.runner.sandbox.executor import SandboxExecutor -from codegen.sdk.codebase.config import ProjectConfig, SessionOptions +from codegen.sdk.codebase.config import CodebaseConfig, GSFeatureFlags, ProjectConfig, SessionOptions from codegen.sdk.codebase.factory.codebase_factory import CodebaseType from codegen.sdk.core.codebase import Codebase +from codegen.sdk.secrets import Secrets from codegen.shared.compilation.string_to_code import create_execute_function_from_codeblock from codegen.shared.configs.config import config from codegen.shared.performance.stopwatch_utils import stopwatch @@ -30,12 +30,9 @@ class SandboxRunner: codebase: CodebaseType executor: SandboxExecutor - def __init__( - self, - repo_config: RepoConfig, - ) -> None: + def __init__(self, repo_config: RepoConfig, access_token: str) -> None: self.repo = repo_config - self.op = RemoteRepoOperator(repo_config=repo_config, access_token=config.secrets.github_token) + self.op = RemoteRepoOperator(repo_config=self.repo, access_token=access_token) self.commit = self.op.git_cli.head.commit async def warmup(self) -> None: @@ -48,9 +45,11 @@ async def warmup(self) -> None: async def _build_graph(self) -> Codebase: logger.info("> Building graph...") - programming_language = self.op.repo_config.language - projects = [ProjectConfig(programming_language=programming_language, repo_operator=self.op, base_path=self.op.repo_config.base_path, subdirectories=self.op.repo_config.subdirectories)] - return Codebase(projects=projects, config=get_codebase_config()) + projects = [ProjectConfig(programming_language=self.repo.language, repo_operator=self.op, base_path=self.repo.base_path, subdirectories=self.repo.subdirectories)] + gs_ffs = GSFeatureFlags(**config.feature_flags.model_dump()) + secrets = Secrets(openai_key=config.secrets.openai_api_key) + codebase_config = CodebaseConfig(secrets=secrets, feature_flags=gs_ffs) + return Codebase(projects=projects, config=codebase_config) @stopwatch def reset_runner(self) -> None: diff --git a/src/codegen/runner/sandbox/server.py b/src/codegen/runner/sandbox/server.py index ebc1c618e..ff3f42242 100644 --- a/src/codegen/runner/sandbox/server.py +++ b/src/codegen/runner/sandbox/server.py @@ -7,6 +7,7 @@ import psutil from fastapi import FastAPI +from codegen.git.schemas.repo_config import RepoConfig from codegen.runner.enums.warmup_state import WarmupState from codegen.runner.models.apis import ( BRANCH_ENDPOINT, @@ -20,9 +21,10 @@ SignalShutdownResponse, UtilizationMetrics, ) -from codegen.runner.models.configs import get_repo_config from codegen.runner.sandbox.middlewares import CodemodRunMiddleware from codegen.runner.sandbox.runner import SandboxRunner +from codegen.shared.configs.config import config +from codegen.shared.enums.programming_language import ProgrammingLanguage from codegen.shared.performance.memory_utils import get_memory_stats logger = logging.getLogger(__name__) @@ -37,11 +39,15 @@ async def lifespan(server: FastAPI): global runner try: - repo_config = get_repo_config() - server_info = ServerInfo(repo_name=repo_config.full_name) + server_info = ServerInfo(repo_name=config.repository.full_name) logger.info(f"Starting up sandbox fastapi server for repo_name={server_info.repo_name}") - - runner = SandboxRunner(repo_config=repo_config) + repo_config = RepoConfig( + name=config.repository.repo_name, + full_name=config.repository.full_name, + base_dir=os.path.dirname(config.repository.repo_path), + language=ProgrammingLanguage(config.repository.language.upper()), + ) + runner = SandboxRunner(repo_config=repo_config, access_token=config.secrets.github_token) server_info.warmup_state = WarmupState.PENDING await runner.warmup() server_info.warmup_state = WarmupState.COMPLETED diff --git a/src/codegen/shared/configs/models.py b/src/codegen/shared/configs/models.py index 0d0ea3961..e379c7058 100644 --- a/src/codegen/shared/configs/models.py +++ b/src/codegen/shared/configs/models.py @@ -8,13 +8,25 @@ from codegen.shared.configs.constants import CONFIG_PATH, ENV_PATH -class TypescriptConfig(BaseModel): +def _get_setting_config(group_name: str) -> SettingsConfigDict: + return SettingsConfigDict( + env_prefix=f"CODEGEN_{group_name}__", + env_file=ENV_PATH, + case_sensitive=False, + extra="ignore", + exclude_defaults=False, + ) + + +class TypescriptConfig(BaseSettings): + model_config = _get_setting_config("FEATURE_FLAGS_TYPESCRIPT") ts_dependency_manager: bool | None = None ts_language_engine: bool | None = None v8_ts_engine: bool | None = None -class CodebaseFeatureFlags(BaseModel): +class CodebaseFeatureFlags(BaseSettings): + model_config = _get_setting_config("FEATURE_FLAGS") debug: bool | None = None verify_graph: bool | None = None track_graph: bool | None = None @@ -28,25 +40,23 @@ class CodebaseFeatureFlags(BaseModel): typescript: TypescriptConfig = Field(default_factory=TypescriptConfig) -class RepositoryConfig(BaseModel): +class RepositoryConfig(BaseSettings): """Configuration for the repository context to run codegen. To populate this config, call `codegen init` from within a git repository. """ + model_config = _get_setting_config("REPOSITORY") + repo_path: str | None = None repo_name: str | None = None full_name: str | None = None + language: str | None = None user_name: str | None = None user_email: str | None = None - language: str | None = None class SecretsConfig(BaseSettings): - model_config = SettingsConfigDict( - env_prefix="CODEGEN_SECRETS__", - env_file=ENV_PATH, - case_sensitive=False, - ) + model_config = _get_setting_config("SECRETS") github_token: str | None = None openai_api_key: str | None = None diff --git a/tests/unit/codegen/runner/sandbox/conftest.py b/tests/unit/codegen/runner/sandbox/conftest.py index f8a7eceeb..8ae452495 100644 --- a/tests/unit/codegen/runner/sandbox/conftest.py +++ b/tests/unit/codegen/runner/sandbox/conftest.py @@ -4,12 +4,10 @@ import pytest from codegen.git.repo_operator.local_repo_operator import LocalRepoOperator -from codegen.runner.models.configs import RunnerFeatureFlags from codegen.runner.sandbox.executor import SandboxExecutor from codegen.runner.sandbox.runner import SandboxRunner -from codegen.sdk.codebase.config import CodebaseConfig, GSFeatureFlags, ProjectConfig +from codegen.sdk.codebase.config import ProjectConfig from codegen.sdk.core.codebase import Codebase -from codegen.sdk.secrets import Secrets from codegen.shared.enums.programming_language import ProgrammingLanguage @@ -33,13 +31,4 @@ def runner(codebase: Codebase, tmpdir): mock_init_codebase.return_value = codebase mock_op.return_value = codebase.op - yield SandboxRunner(repo_config=codebase.op.repo_config) - - -@pytest.fixture(autouse=True) -def mock_codebase_config(): - with patch("codegen.runner.sandbox.runner.get_codebase_config") as mock_config: - gs_ffs = GSFeatureFlags(**RunnerFeatureFlags().model_dump()) - secrets = Secrets(openai_key="test-key") - mock_config.return_value = CodebaseConfig(secrets=secrets, feature_flags=gs_ffs) - yield mock_config + yield SandboxRunner(repo_config=codebase.op.repo_config, access_token="test-token")