diff --git a/chart/.pre-commit-config.yaml b/chart/.pre-commit-config.yaml index 16634adb4e179..699beac90d6f1 100644 --- a/chart/.pre-commit-config.yaml +++ b/chart/.pre-commit-config.yaml @@ -81,6 +81,13 @@ repos: pass_filenames: false files: ^.* require_serial: true + - id: build-kustomize-overlays + name: Build and smoke-test Kustomize overlays + entry: ../scripts/ci/prek/build_kustomize_overlays.py + language: python + pass_filenames: false + files: ^kustomize-overlays/.*\.(yaml|yml)$|^kustomize-overlays/.*/STATUS$ + require_serial: true - id: lint-json-schema name: Lint chart/values.schema.json entry: ../scripts/ci/prek/lint_json_schema.py diff --git a/chart/kustomize-overlays/CONTRIBUTING.rst b/chart/kustomize-overlays/CONTRIBUTING.rst new file mode 100644 index 0000000000000..44ef0a413155f --- /dev/null +++ b/chart/kustomize-overlays/CONTRIBUTING.rst @@ -0,0 +1,158 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Contributing Kustomize Overlays +=============================== + +This document is the authoritative reference for adding, evolving, and +retiring overlays under ``chart/kustomize-overlays/``. + +Why this directory exists +------------------------- + +The Airflow Helm chart has historically carried components that are not +Airflow-native. They make the chart heavier than it needs to be and pull +maintenance toward things that already have external owners. Expressing +these components as Kustomize overlays keeps the chart focused on Airflow +itself while still giving users a working starting point for the rest. + +The chart never removes a component without a working overlay already in +place. Users always have a migration path before anything disappears. + +Criteria for chart vs Kustomize +------------------------------- + +A component **belongs in the chart** when all of the following are true: + +* It is required to run Airflow (scheduler, API server, dag-processor, + triggerer, workers). +* Removing/adding it requires changes to Airflow's own configuration. +* It has no external owner. +* It is assumed that the larger majority (>80%) will need and use this function for productive use + +A component **belongs in Kustomize** when any of the following are true: + +* It can be expressed as a standalone Kubernetes resource without modifying + chart-rendered resources. +* It is environment-specific (authentication schemes, logging backends, + autoscaling controllers, etc.). +* It has an external owner (KEDA, Elasticsearch, any PostgreSQL distribution, etc.). +* It requires CRDs that the chart does not install. +* It is used by a minority of users, such that the additional complexity and maintenance burden do not pay off + +If a component qualifies for Kustomize but no overlay exists yet, it stays in +the chart until the overlay is in place and verified. + +Overlay structure +----------------- + +Each overlay directory must contain: + +* ``kustomization.yaml`` - the Kustomize entry point. +* The Kubernetes resources the overlay produces. +* ``STATUS.yaml`` - a small YAML document declaring the verification state. +* ``README.rst`` - usage instructions and a migration guide from the + equivalent chart-side configuration. + +STATUS file format +------------------ + +The ``STATUS.yaml`` file is a small YAML document with the following fields. + +For a verified overlay: + +.. code-block:: yaml + + status: tested + chart-version: "1.21.0" + last-verified: "2026-04-25" + +For a starting-point overlay without functional CI coverage: + +.. code-block:: yaml + + status: not-tested + reason: "Pending community validation. Use as a starting point only." + +For an overlay scheduled for removal: + +.. code-block:: yaml + + status: deprecated + message: "Replaced by . Will be removed in chart 3.0.0." + +Lifecycle +--------- + +The lifecycle mirrors how providers work, just on a smaller scale. Two +checks gate the ``STATUS`` field, and they are deliberately separate. + +The ``build_kustomize_overlays`` prek hook +(``scripts/ci/prek/build_kustomize_overlays.py``) runs on every commit and +applies a generic structural check to every overlay: the build succeeds, the +output parses as valid YAML, every resource has ``apiVersion``, ``kind`` and +``metadata.name``, and there are no duplicate resource keys. This is enough +to catch most authoring mistakes but it does not validate against the CRD +schemas of the controllers the overlay targets, and nothing is ever applied +to a live cluster. + +A functional integration test is the separate, stronger check. It applies +the overlay against a real cluster (typically a kind cluster with the chart +already installed and the relevant controller running) and asserts the +runtime behaviour the overlay promises. Until such a test exists for an +overlay, its ``STATUS`` must stay at ``not-tested``. + +Lifecycle steps: + +* A new overlay is proposed via a PR and lands with ``status: not-tested``. + The prek hook automatically applies the generic structural check; if the + overlay needs invariants beyond that (for example a cross-reference + between resources), they belong in the integration test, not in the prek + hook. +* A follow-up PR adds a functional integration test for the overlay. Once + that test passes, ``STATUS`` is flipped to ``tested``. +* An overlay is deprecated by setting ``status: deprecated`` together with a + ``message`` field pointing to the replacement. +* Deprecated overlays remain for one chart major version before they are + removed, so users always have a window to migrate. + +Adding a new overlay +-------------------- + +1. Confirm the component meets the Kustomize criteria above. +2. Create ``chart/kustomize-overlays//`` with the required files. +3. Use placeholders such as ``RELEASE-NAME`` for values the user must fill in, + and document the substitutions in the overlay's ``README.rst``. +4. Land the PR with ``status: not-tested``. +5. Add a row to the table in ``chart/kustomize-overlays/README.rst``. +6. Follow up with a CI test and flip ``STATUS`` to ``tested``. + +Migration guide pattern +----------------------- + +Each overlay ``README.rst`` should include a migration guide section with +exactly three parts: + +1. **What the chart currently does** - the relevant ``values.yaml`` keys and + the Kubernetes resources they produce today. +2. **What the overlay provides** - the equivalent resources rendered from the + overlay. +3. **How to switch** - step-by-step instructions, with the explicit order of + operations. + +The guide must be written against the current chart template. It is not +speculative documentation. diff --git a/chart/kustomize-overlays/README.rst b/chart/kustomize-overlays/README.rst new file mode 100644 index 0000000000000..dfa321af733cc --- /dev/null +++ b/chart/kustomize-overlays/README.rst @@ -0,0 +1,71 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +Airflow Helm Chart - Kustomize Overlays +======================================= + +.. note:: + + **Not distributed with chart releases.** + This directory lives in the source repository as a reference for users but + is **not** packaged or published as part of the official Airflow Helm chart + release artifacts. Consume it directly from the repository at the tag that + matches your chart version. + +This directory contains Kustomize overlays that complement the Airflow Helm +chart for components that are not Airflow-native. + +The motivation, criteria, and lifecycle for these overlays are defined in +``CONTRIBUTING.rst`` in this directory. + +Available overlays +------------------ + ++----------+----------------------+----------------------------------------------+ +| Overlay | STATUS | Purpose | ++==========+======================+==============================================+ +| ``keda`` | not-tested (PoC) | Autoscaling for Celery workers via KEDA. | ++----------+----------------------+----------------------------------------------+ + +Each overlay directory contains its own ``README.rst`` with usage details and +a migration guide from the equivalent chart-side configuration. + +Using an overlay +---------------- + +The overlays are designed for the "standalone addition" pattern. They do not +modify resources rendered by the chart. A typical workflow is: + +1. Install the Airflow chart as usual. +2. Reference the overlay from your own ``kustomization.yaml`` and apply the + substitutions described in the overlay's ``README.rst`` (release name, + namespace, secret references). +3. Apply the rendered manifests with ``kubectl apply -k`` against the same + namespace as the chart release. + +Status conventions +------------------ + +Each overlay carries a ``STATUS.yaml`` file that declares its verification level: + +* ``tested`` - the overlay is verified in Apache Airflow CI against the current chart version. +* ``not-tested`` - the overlay builds successfully but has no functional CI + coverage. Treat it as a starting point that you adapt to your environment. +* ``deprecated`` - the overlay is scheduled for removal. The ``STATUS.yaml`` file + carries a ``message`` field pointing to the replacement. + +See `CONTRIBUTING.rst `_ for the full status grammar and lifecycle. diff --git a/chart/kustomize-overlays/keda/README.rst b/chart/kustomize-overlays/keda/README.rst new file mode 100644 index 0000000000000..888a22d74cc04 --- /dev/null +++ b/chart/kustomize-overlays/keda/README.rst @@ -0,0 +1,180 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + +KEDA Autoscaler Overlay +======================= + +This overlay produces a `KEDA `__ ``ScaledObject`` plus a +``TriggerAuthentication`` for the chart-rendered Celery workers. It is a +standalone addition: no resource produced by the Helm chart is modified. + +It is the Kustomize equivalent of enabling ``workers.celery.keda.enabled`` +in ``values.yaml``, and is the recommended migration path for users who +want to keep Celery autoscaling without relying on chart-side templating. + +Prerequisites +------------- + +* `KEDA `__ installed in the cluster. +* The Airflow chart installed with the **CeleryExecutor**. +* The chart's metadata Secret (``-airflow-metadata``) reachable + from KEDA's namespace - usually the same namespace as the chart release. + +Resources produced +------------------ + +* ``TriggerAuthentication/airflow-keda-postgres-auth`` - reads the metadata + DB connection string directly from the chart's metadata Secret. +* ``ScaledObject/airflow-worker`` - targets the chart-rendered worker + Deployment and scales it based on the count of running and queued task + instances. + +Usage +----- + +Reference this overlay from your own kustomization and substitute the +release name. A minimal example: + +.. code-block:: yaml + + # my-overlay/kustomization.yaml + apiVersion: kustomize.config.k8s.io/v1beta1 + kind: Kustomization + namespace: airflow + + resources: + - github.com/apache/airflow/chart/kustomize-overlays/keda?ref=helm-chart/1.21.0 + + replacements: + - source: + kind: ConfigMap + name: airflow-overlay-config + fieldPath: data.releaseName + targets: + - select: + kind: ScaledObject + name: airflow-worker + fieldPaths: + - spec.scaleTargetRef.name + options: + delimiter: "-" + index: 0 + - select: + kind: TriggerAuthentication + name: airflow-keda-postgres-auth + fieldPaths: + - spec.secretTargetRef.0.name + options: + delimiter: "-" + index: 0 + + configMapGenerator: + - name: airflow-overlay-config + literals: + - releaseName=airflow + +Apply with: + +.. code-block:: bash + + kubectl apply -k my-overlay/ + +For a quick test, you can also just ``sed`` the placeholder: + +.. code-block:: bash + + kustomize build chart/kustomize-overlays/keda | \ + sed 's/RELEASE-NAME/airflow/g' | \ + kubectl apply -f - + +Tuning the trigger query +------------------------ + +The default query mirrors the chart for a single Celery queue named +``default`` with ``worker_concurrency=16``. If you set different values in +your chart install, edit ``scaledobject.yaml`` accordingly: + +* Replace ``16`` with the value of ``config.celery.worker_concurrency``. +* Extend ``queue IN ('default')`` to list every entry from + ``workers.celery.queue`` (comma-separated in ``values.yaml``, single-quoted + here). + +Pgbouncer +--------- + +If pgbouncer is enabled and you do not want KEDA polling through it, change +the ``key`` field in ``triggerauthentication.yaml`` from ``connection`` to +``kedaConnection``. The chart writes a direct-to-Postgres connection string +under that key for exactly this purpose. + +Persistence +----------- + +If your worker is deployed as a ``StatefulSet`` (i.e. you set +``workers.celery.persistence.enabled=true``), change ``kind: Deployment`` to +``kind: StatefulSet`` under ``scaleTargetRef`` in ``scaledobject.yaml``. + +Migration guide from the chart +------------------------------ + +What the chart currently does +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When ``workers.celery.keda.enabled=true``, the chart renders: + +* A ``ScaledObject`` named ``-worker`` targeting the worker + Deployment or StatefulSet. +* The KEDA trigger reads the connection string from the worker pod's + ``KEDA_DB_CONN`` (or ``AIRFLOW_CONN_AIRFLOW_DB``) env var, which is + itself sourced from the metadata Secret. +* Tuning is exposed under ``workers.celery.keda.*`` in ``values.yaml``. + +What this overlay provides +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* The same ``ScaledObject`` as a standalone resource. +* A ``TriggerAuthentication`` that reads the connection string directly + from the chart's metadata Secret. This avoids the indirection through + the worker pod env var, and means the overlay does not need to patch any + chart-rendered resource. + +How to switch +^^^^^^^^^^^^^ + +1. Install or upgrade the chart with ``workers.celery.keda.enabled=false``. +2. Render this overlay with the substitutions described above. +3. Apply the rendered manifests. +4. Confirm KEDA reports ``ScaledObject`` ``Ready`` and the worker scales + on demand. Useful command: + + .. code-block:: bash + + kubectl describe scaledobject airflow-worker -n + +If you previously set custom ``pollingInterval``, ``cooldownPeriod``, +``minReplicaCount``, ``maxReplicaCount``, ``advanced``, or ``query`` under +``workers.celery.keda``, copy them into ``scaledobject.yaml`` before +applying. + +Status +------ + +This overlay carries ``status: not-tested``. It builds successfully but has +no functional CI coverage yet. Treat it as a starting point and adapt it +to your environment. Feedback and improvements via pull request are very +welcome under the `helm-chart refurbish umbrella issue +`__. diff --git a/chart/kustomize-overlays/keda/STATUS.yaml b/chart/kustomize-overlays/keda/STATUS.yaml new file mode 100644 index 0000000000000..e072e6be44b51 --- /dev/null +++ b/chart/kustomize-overlays/keda/STATUS.yaml @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +--- +status: not-tested +reason: >- + First proof-of-concept overlay for KEDA. Builds successfully and passes the + generic structural check in scripts/ci/prek/build_kustomize_overlays.py, but + has no CRD-schema or functional integration test yet. Use as a starting + point only. Tracked under https://github.com/apache/airflow/issues/64037. diff --git a/chart/kustomize-overlays/keda/kustomization.yaml b/chart/kustomize-overlays/keda/kustomization.yaml new file mode 100644 index 0000000000000..2909d4e22d2f0 --- /dev/null +++ b/chart/kustomize-overlays/keda/kustomization.yaml @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Kustomize base for the KEDA autoscaler overlay. +# +# This base ships with placeholder values (RELEASE-NAME, NAMESPACE) that +# downstream kustomizations must override before applying. See README.rst +# in this directory for substitution patterns and a migration guide from +# the equivalent chart-side configuration. + +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - triggerauthentication.yaml + - scaledobject.yaml + +labels: + - includeSelectors: false + pairs: + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/part-of: airflow + app.kubernetes.io/component: worker-autoscaler diff --git a/chart/kustomize-overlays/keda/scaledobject.yaml b/chart/kustomize-overlays/keda/scaledobject.yaml new file mode 100644 index 0000000000000..982839b34f55a --- /dev/null +++ b/chart/kustomize-overlays/keda/scaledobject.yaml @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Standalone ScaledObject for the chart-rendered Celery worker. +# +# Substitute the placeholders before applying: +# * RELEASE-NAME - the Helm release name +# * If your worker uses persistence, change `kind: Deployment` +# to `kind: StatefulSet` under scaleTargetRef. +# +# The default `query` mirrors the chart for a single Celery queue named +# "default" with worker_concurrency=16. Adjust both numbers if you +# customised those values, and extend the `queue IN (...)` clause if +# your chart values set additional queues. +--- +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: airflow-worker +spec: + scaleTargetRef: + kind: Deployment + name: RELEASE-NAME-worker + envSourceContainerName: worker + pollingInterval: 5 + cooldownPeriod: 30 + minReplicaCount: 0 + maxReplicaCount: 10 + triggers: + - type: postgresql + metadata: + targetQueryValue: "1" + query: >- + SELECT ceil(COUNT(*)::decimal / 16) + FROM task_instance + WHERE (state='running' OR state='queued') + AND queue IN ('default') + authenticationRef: + name: airflow-keda-postgres-auth diff --git a/chart/kustomize-overlays/keda/triggerauthentication.yaml b/chart/kustomize-overlays/keda/triggerauthentication.yaml new file mode 100644 index 0000000000000..60971fbdc3947 --- /dev/null +++ b/chart/kustomize-overlays/keda/triggerauthentication.yaml @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Reads the metadata DB connection string directly from the secret that +# the chart already creates. This keeps the overlay a pure standalone +# addition - no chart-rendered resource is patched. +# +# Substitute RELEASE-NAME with your Helm release name. If pgbouncer is +# enabled and you do not want KEDA polling through it, change the `key` +# from `connection` to `kedaConnection`. +--- +apiVersion: keda.sh/v1alpha1 +kind: TriggerAuthentication +metadata: + name: airflow-keda-postgres-auth +spec: + secretTargetRef: + - parameter: connection + name: RELEASE-NAME-airflow-metadata + key: connection diff --git a/scripts/ci/prek/build_kustomize_overlays.py b/scripts/ci/prek/build_kustomize_overlays.py new file mode 100755 index 0000000000000..71787def6e23e --- /dev/null +++ b/scripts/ci/prek/build_kustomize_overlays.py @@ -0,0 +1,226 @@ +#!/usr/bin/env python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# /// script +# requires-python = ">=3.10,<3.11" +# dependencies = [ +# "pydantic>=2.0", +# "PyYAML>=6.0", +# "rich>=13.6.0", +# ] +# /// + +# ============================================================================= +# Build and structural smoke test for chart/kustomize-overlays/*. +# +# Runs the same checks against every overlay; nothing here is overlay- or +# CRD-specific. +# +# What this hook validates: +# * `kubectl kustomize` builds the overlay successfully. +# * The output parses as valid YAML. +# * At least one resource is produced. +# * Every resource carries apiVersion, kind, and metadata.name. +# * No two resources share the same (apiVersion, kind, namespace, name). +# +# What this hook does not validate: +# * Field schema correctness against the targeted CRD. A typo in a field +# name will still pass. +# * Cross-references between resources, or references to resources +# produced elsewhere (for example by the chart). +# * Runtime behaviour: the overlay is never applied to a live API server +# and no controller ever reconciles it. +# +# Treat a passing run as "the overlay is structurally well-formed", not as +# "the overlay works against Kubernetes". An overlay's STATUS file may only +# advance to `tested` once a functional integration test is in place; this +# hook alone is not enough to support that claim. See CONTRIBUTING.rst in +# `chart/kustomize-overlays/` for the lifecycle. +# ============================================================================= + +from __future__ import annotations + +import os +import subprocess +import sys +from pathlib import Path +from typing import Annotated, Literal + +import yaml +from common_prek_utils import AIRFLOW_ROOT_PATH, console, initialize_breeze_prek +from pydantic import BaseModel, ConfigDict, Field, TypeAdapter, ValidationError + +initialize_breeze_prek(__name__, __file__) + + +# --------------------------------------------------------------------------- +# STATUS.yaml contract — Pydantic discriminated union, one variant per status. +# --------------------------------------------------------------------------- + + +class _TestedStatus(BaseModel): + model_config = ConfigDict(extra="forbid", populate_by_name=True) + + status: Literal["tested"] + chart_version: str = Field(alias="chart-version") + last_verified: str = Field(alias="last-verified", pattern=r"^\d{4}-\d{2}-\d{2}$") + + +class _NotTestedStatus(BaseModel): + model_config = ConfigDict(extra="forbid") + + status: Literal["not-tested"] + reason: str | None = None + + +class _DeprecatedStatus(BaseModel): + model_config = ConfigDict(extra="forbid") + + status: Literal["deprecated"] + message: str + + +_StatusDoc = Annotated[ + _TestedStatus | _NotTestedStatus | _DeprecatedStatus, + Field(discriminator="status"), +] +_STATUS_ADAPTER: TypeAdapter[_StatusDoc] = TypeAdapter(_StatusDoc) + + +def _validate_status(overlay_dir: Path) -> list[str]: + status_path = overlay_dir / "STATUS.yaml" + if not status_path.exists(): + return [f"missing STATUS.yaml in {overlay_dir.name}"] + try: + data = yaml.safe_load(status_path.read_text()) + except yaml.YAMLError as exc: + return [f"STATUS.yaml is not valid YAML: {exc}"] + try: + _STATUS_ADAPTER.validate_python(data) + except ValidationError as exc: + return [f"STATUS.yaml schema error: {exc}"] + return [] + + +def _structural_check(docs: list[object]) -> list[str]: + """Run generic structural checks that hold for any Kustomize overlay.""" + errors: list[str] = [] + if not docs: + return ["no resources produced"] + + seen: set[tuple[str, str, str, str]] = set() + for index, doc in enumerate(docs): + if not isinstance(doc, dict): + errors.append(f"document {index} is not a mapping ({type(doc).__name__})") + continue + api_version = doc.get("apiVersion") or "" + kind = doc.get("kind") or "" + if not api_version: + errors.append(f"document {index} missing apiVersion") + if not kind: + errors.append(f"document {index} missing kind") + metadata = doc.get("metadata") or {} + name = metadata.get("name") or "" + if not name: + errors.append(f"document {index} missing metadata.name") + namespace = metadata.get("namespace") or "" + key = (api_version, kind, namespace, name) + if key in seen: + errors.append(f"duplicate resource ({api_version} {kind} {namespace}/{name})") + seen.add(key) + return errors + + +res_setup = subprocess.run(["breeze", "k8s", "setup-env"], check=True) +if res_setup.returncode != 0: + console.print("[red]\nError while setting up k8s environment.") + sys.exit(res_setup.returncode) + +KUBECTL_BIN_PATH = AIRFLOW_ROOT_PATH / ".venv" / "bin" / "kubectl" +OVERLAYS_DIR = AIRFLOW_ROOT_PATH / "chart" / "kustomize-overlays" + +if not OVERLAYS_DIR.is_dir(): + console.print(f"[yellow]No overlay directory at {OVERLAYS_DIR}, nothing to check.") + sys.exit(0) + +kustomizations = sorted(OVERLAYS_DIR.rglob("kustomization.yaml")) +if not kustomizations: + console.print(f"[yellow]No kustomization.yaml files under {OVERLAYS_DIR}, nothing to check.") + sys.exit(0) + + +def _build(overlay_dir: Path) -> tuple[list[object] | None, str]: + result = subprocess.run( + [os.fspath(KUBECTL_BIN_PATH), "kustomize", os.fspath(overlay_dir)], + check=False, + capture_output=True, + text=True, + ) + if result.returncode != 0: + return None, f"build failed:\n{result.stderr}" + try: + docs = [doc for doc in yaml.safe_load_all(result.stdout) if doc] + except yaml.YAMLError as exc: + return None, f"build produced invalid YAML: {exc}" + if result.stderr.strip(): + console.print(f"[yellow]warnings:\n{result.stderr.strip()}") + return docs, "" + + +failures: list[str] = [] +for kustomization in kustomizations: + overlay_dir = kustomization.parent + rel = overlay_dir.relative_to(AIRFLOW_ROOT_PATH) + console.print(f"[blue]\nKustomize overlay [bold]{rel}[/bold]") + + docs, build_err = _build(overlay_dir) + if docs is None: + console.print(f"[red] {build_err}") + failures.append(str(rel)) + continue + console.print(f"[green] build ok ({len(docs)} resource(s))") + + errors = _structural_check(docs) + if errors: + console.print("[red] structural check failed:") + for err in errors: + console.print(f" - {err}") + failures.append(str(rel)) + else: + console.print("[green] structural check ok") + + status_errors = _validate_status(overlay_dir) + if status_errors: + console.print("[red] STATUS.yaml check failed:") + for err in status_errors: + console.print(f" - {err}") + if str(rel) not in failures: + failures.append(str(rel)) + else: + console.print("[green] STATUS.yaml ok") + +if failures: + console.print(f"[red]\n{len(failures)} overlay(s) failed:") + for failure in failures: + console.print(f" - {failure}") + sys.exit(1) + +console.print("[green]\nAll Kustomize overlays built and passed the generic structural check.") +console.print( + "[yellow]Note: this hook does not validate CRD schemas or runtime behaviour. " + "An overlay's STATUS may only advance to `tested` once a functional integration test exists." +)