diff --git a/charts/agentevals/templates/deployment.yaml b/charts/agentevals/templates/deployment.yaml index e8852dc..b358b1a 100644 --- a/charts/agentevals/templates/deployment.yaml +++ b/charts/agentevals/templates/deployment.yaml @@ -29,8 +29,9 @@ spec: securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} serviceAccountName: {{ include "agentevals.serviceAccountName" . }} - {{- if .Values.ephemeralVolume.enabled }} + {{- if or .Values.ephemeralVolume.enabled .Values.extraVolumes }} volumes: + {{- if .Values.ephemeralVolume.enabled }} - name: agentevals-tmp {{- if or .Values.ephemeralVolume.sizeLimit (eq .Values.ephemeralVolume.medium "Memory") }} emptyDir: @@ -43,6 +44,10 @@ spec: {{- else }} emptyDir: {} {{- end }} + {{- end }} + {{- with .Values.extraVolumes }} + {{- toYaml . | nindent 8 }} + {{- end }} {{- end }} containers: - name: agentevals @@ -70,6 +75,8 @@ spec: value: "postgres" - name: AGENTEVALS_DATABASE_SCHEMA value: {{ .Values.database.postgres.schema | quote }} + - name: AGENTEVALS_AUTO_MIGRATE + value: {{ .Values.database.postgres.autoMigrate | quote }} {{- if .Values.database.postgres.urlFile }} - name: AGENTEVALS_DATABASE_URL_FILE value: {{ .Values.database.postgres.urlFile | quote }} @@ -135,10 +142,15 @@ spec: port: http initialDelaySeconds: 15 periodSeconds: 20 - {{- if .Values.ephemeralVolume.enabled }} + {{- if or .Values.ephemeralVolume.enabled .Values.extraVolumeMounts }} volumeMounts: + {{- if .Values.ephemeralVolume.enabled }} - name: agentevals-tmp mountPath: /tmp + {{- end }} + {{- with .Values.extraVolumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} {{- end }} {{- with .Values.nodeSelector }} nodeSelector: diff --git a/charts/agentevals/values.yaml b/charts/agentevals/values.yaml index b3987b6..e7c9c49 100644 --- a/charts/agentevals/values.yaml +++ b/charts/agentevals/values.yaml @@ -159,6 +159,16 @@ env: [] # -- Extra envFrom sources (ConfigMapRef, SecretRef) envFrom: [] +# -- Extra volumes appended to the pod spec. Use this to mount additional +# config files or secrets (e.g. result-sink credentials) into the pod. +extraVolumes: [] + +# -- Extra volumeMounts appended to the main container. Pair with +# extraVolumes by name. securityContext.readOnlyRootFilesystem is true by +# default; that only makes the root filesystem read-only, mounted paths +# themselves are unaffected, so a writable extraVolumes entry works fine. +extraVolumeMounts: [] + # ============================================================================== # STORAGE (preview feature) # @@ -195,6 +205,12 @@ database: urlFile: "" # -- Postgres schema to use for agentevals tables. schema: agentevals + # -- Apply pending database migrations during server startup before the + # HTTP listener opens. The Postgres advisory lock serialises concurrent + # replica starts so this is safe with replicaCount > 1. When set to + # false the server refuses to start if the schema is behind or dirty; + # run "agentevals migrate up" manually in that case. + autoMigrate: true # -- Bundled Postgres instance for development and evaluation only. # Not suitable for production. Deployed when enabled is true and url / # urlFile are not set. diff --git a/src/agentevals/api/app.py b/src/agentevals/api/app.py index eeb9375..80a9790 100644 --- a/src/agentevals/api/app.py +++ b/src/agentevals/api/app.py @@ -20,7 +20,7 @@ from ..run.sinks import log_registered_sinks from ..run.worker import AsyncRunWorker from ..storage import StorageSettings, build_repos -from ..storage.postgres.migrator import Migrator +from ..storage.postgres.migrator import Migrator, discover_migrations from ..utils.log_buffer import log_buffer from .debug_routes import debug_router from .routes import router @@ -31,6 +31,22 @@ logger = logging.getLogger(__name__) +_TRUE_VALUES = {"true", "1", "yes", "on"} +_FALSE_VALUES = {"false", "0", "no", "off"} + + +def _env_bool(name: str, *, default: bool) -> bool: + raw = os.getenv(name) + if raw is None or raw == "": + return default + val = raw.strip().lower() + if val in _TRUE_VALUES: + return True + if val in _FALSE_VALUES: + return False + raise ValueError(f"{name} must be one of true/false/1/0/yes/no/on/off (got: {raw!r})") + + try: from dotenv import load_dotenv @@ -68,13 +84,34 @@ async def lifespan(app: FastAPI): logger.error("Storage configuration invalid; /api/runs will not be available: %s", exc) if storage_settings is not None and storage_settings.backend == "postgres": - logger.info("Applying any pending migrations to schema '%s'", storage_settings.schema_name) migrator = Migrator( dsn=storage_settings.database_url or "", schema=storage_settings.schema_name, lock_timeout_s=storage_settings.migrate_lock_timeout_s, ) - await migrator.up() + if _env_bool("AGENTEVALS_AUTO_MIGRATE", default=True): + logger.info("Applying any pending migrations to schema '%s'", storage_settings.schema_name) + await migrator.up() + else: + logger.info( + "AGENTEVALS_AUTO_MIGRATE is disabled; verifying schema '%s' is up to date", + storage_settings.schema_name, + ) + status = await migrator.status() + if status.dirty: + raise RuntimeError( + f"schema_migrations is dirty at version {status.version}. " + "Resolve manually and run 'agentevals migrate force ', " + "or set AGENTEVALS_AUTO_MIGRATE=true to retry on startup." + ) + current = status.version + pending = [m.version for m in discover_migrations() if current is None or m.version > current] + if pending: + raise RuntimeError( + f"Database schema is behind: pending migrations {pending}. " + "Run 'agentevals migrate up' to apply them, " + "or set AGENTEVALS_AUTO_MIGRATE=true to apply on startup." + ) repos = await build_repos(storage_settings) app.state.storage_settings = storage_settings