Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 39 additions & 13 deletions .github/workflows/registry-backfill.yml
Original file line number Diff line number Diff line change
Expand Up @@ -176,18 +176,6 @@ jobs:
"${S3_BUCKET}api/providers.json" \
dev/registry/providers.json || true

- name: "Extract version metadata from git tags"
env:
VERSIONS: ${{ matrix.versions }}
PROVIDER: ${{ matrix.provider }}
run: |
VERSION_ARGS=""
for VERSION in ${VERSIONS}; do
VERSION_ARGS="${VERSION_ARGS} --version ${VERSION}"
done
uv run --project dev/registry python dev/registry/extract_versions.py \
--provider "${PROVIDER}" ${VERSION_ARGS} || true

- name: "Run breeze registry backfill"
env:
VERSIONS: ${{ matrix.versions }}
Expand All @@ -211,6 +199,23 @@ jobs:
"${S3_BUCKET}api/modules.json" \
registry/src/_data/modules.json

- name: "Patch providers.json with backfill version(s)"
env:
VERSIONS: ${{ matrix.versions }}
PROVIDER: ${{ matrix.provider }}
run: |
# The S3-cached providers.json predates the backfilled version, so
# provider.versions[] doesn't list it. providerVersions.js then
# filters the on-disk metadata.json out and Eleventy emits no page.
# Patch the version into the array so the build picks it up.
VERSION_ARGS=""
for VERSION in ${VERSIONS}; do
VERSION_ARGS="${VERSION_ARGS} --version ${VERSION}"
done
uv run --project dev/registry python dev/registry/patch_providers_json.py \
--providers-json registry/src/_data/providers.json \
--provider "${PROVIDER}" ${VERSION_ARGS}

- name: "Setup pnpm"
uses: pnpm/action-setup@903f9c1a6ebcba6cf41d87230be49611ac97822e # v6.0.3
with:
Expand Down Expand Up @@ -240,14 +245,35 @@ jobs:
VERSIONS: ${{ matrix.versions }}
PROVIDER: ${{ matrix.provider }}
run: |
# Guard against silent no-op syncs. `aws s3 sync` exits 0 when the
# source dir is missing, so without this guard a build that didn't
# emit pages (e.g. providers.json filter dropped the version) would
# report green while uploading nothing.
for VERSION in ${VERSIONS}; do
LOCAL_PAGE="registry/_site/providers/${PROVIDER}/${VERSION}/index.html"
LOCAL_API_DIR="registry/_site/api/providers/${PROVIDER}/${VERSION}/"
LOCAL_PARAMS="${LOCAL_API_DIR}parameters.json"

if [ ! -f "${LOCAL_PAGE}" ]; then
echo "::error::Build did not emit ${LOCAL_PAGE} -- aborting sync."
exit 1
fi
if [ ! -d "${LOCAL_API_DIR}" ]; then
echo "::error::Missing API directory ${LOCAL_API_DIR} -- aborting sync."
exit 1
fi
if [ ! -s "${LOCAL_PARAMS}" ]; then
echo "::error::${LOCAL_PARAMS} is empty or missing -- aborting sync."
exit 1
fi

echo "Syncing ${PROVIDER}/${VERSION}..."
aws s3 sync \
"registry/_site/providers/${PROVIDER}/${VERSION}/" \
"${S3_BUCKET}providers/${PROVIDER}/${VERSION}/" \
--cache-control "${CACHE_CONTROL}"
aws s3 sync \
"registry/_site/api/providers/${PROVIDER}/${VERSION}/" \
"${LOCAL_API_DIR}" \
"${S3_BUCKET}api/providers/${PROVIDER}/${VERSION}/" \
--cache-control "${CACHE_CONTROL}"
done
Expand Down
110 changes: 110 additions & 0 deletions dev/registry/patch_providers_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Patch providers.json to include backfilled versions in `provider.versions`.

Backfill writes per-version `metadata.json` to disk for the targeted version,
but the `Download data files from S3 for build` workflow step then overwrites
`registry/src/_data/providers.json` with the cached S3 copy. That cached copy
predates the backfilled version (that's why we're backfilling). Without
patching, `providerVersions.js`'s ``provider.versions ∩ availableSet`` filter
drops the on-disk metadata and the Eleventy build emits no page.

This script runs after the S3 download, in the per-matrix backfill job, and
appends the backfilled version(s) into the targeted ``provider.versions``
array. It also defensively re-includes ``provider.version`` (the "latest"
field) -- otherwise patching into a previously-empty ``versions: []`` could
leave the latest field excluded once the array becomes the authoritative
filter.

Backfill-only tool. ``registry-build.yml`` (the full-build workflow) doesn't
need this -- ``extract_metadata.py`` regenerates ``providers.json`` from
scratch there.
"""

from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path

from packaging.version import InvalidVersion, Version


def patch(providers_json_path: Path, provider_id: str, versions: list[str]) -> int:
"""Append `versions` into the targeted provider's `versions[]`. Returns exit code."""
data = json.loads(providers_json_path.read_text())
target = next((p for p in data["providers"] if p["id"] == provider_id), None)
if target is None:
print(
f"ERROR: provider '{provider_id}' not found in {providers_json_path}. "
f"This script is for backfilling versions of providers already in the "
f"registry; for new providers run a full registry-build.yml dispatch."
)
return 1

existing = list(target.get("versions") or [])
# Always keep `provider.version` (latest) in `versions[]`. Without this,
# patching into a previously-empty list could leave the latest field out
# of providerVersions.js's authoritative filter.
latest = target.get("version")
if latest and latest not in existing:
existing.append(latest)

added: list[str] = []
for v in versions:
if v not in existing:
existing.append(v)
added.append(v)

try:
existing.sort(key=Version, reverse=True)
except InvalidVersion:
existing.sort(reverse=True)

target["versions"] = existing
providers_json_path.write_text(json.dumps(data, indent=2) + "\n")
print(f"Patched {provider_id}: added {added or []}; versions list now has {len(existing)} entries")
return 0


def main() -> int:
parser = argparse.ArgumentParser(description=__doc__.splitlines()[0] if __doc__ else "")
parser.add_argument(
"--providers-json",
required=True,
type=Path,
help="Path to providers.json to modify in place.",
)
parser.add_argument(
"--provider",
required=True,
help="Provider ID (e.g. 'amazon', 'common-compat').",
)
parser.add_argument(
"--version",
required=True,
action="append",
help="Version(s) to add (repeat the flag for multiple).",
)
args = parser.parse_args()
return patch(args.providers_json, args.provider, args.version)


if __name__ == "__main__":
sys.exit(main())
4 changes: 4 additions & 0 deletions dev/registry/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ version = "0.0.1"
requires-python = ">=3.10"
classifiers = ["Private :: Do Not Upload"]
dependencies = [
# `patch_providers_json.py` uses `packaging.version` for newest-first
# sorting. It also flows transitively via pydantic, but declare it
# explicitly so the script doesn't break if upstream drops the chain.
"packaging>=24.0",
"pydantic>=2.12.0",
"pyyaml>=6.0.3",
# extract_versions.py and extract_metadata.py read pyproject.toml via
Expand Down
190 changes: 190 additions & 0 deletions dev/registry/tests/test_patch_providers_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Unit tests for dev/registry/patch_providers_json.py."""

from __future__ import annotations

import json
import sys
from unittest.mock import patch as mock_patch

import pytest
from patch_providers_json import main, patch


def _write_providers(tmp_path, providers):
p = tmp_path / "providers.json"
p.write_text(json.dumps({"providers": providers}, indent=2))
return p


class TestPatch:
def test_appends_missing_version(self, tmp_path):
p = _write_providers(
tmp_path,
[{"id": "amazon", "version": "9.26.0", "versions": ["9.26.0", "9.25.0"]}],
)
rc = patch(p, "amazon", ["9.24.0"])
assert rc == 0
data = json.loads(p.read_text())
amazon = next(p for p in data["providers"] if p["id"] == "amazon")
assert amazon["versions"] == ["9.26.0", "9.25.0", "9.24.0"]

def test_skips_already_present_version(self, tmp_path):
p = _write_providers(
tmp_path,
[{"id": "amazon", "version": "9.26.0", "versions": ["9.26.0", "9.25.0"]}],
)
before = json.loads(p.read_text())
rc = patch(p, "amazon", ["9.25.0"])
after = json.loads(p.read_text())
assert rc == 0
# Same content; sorting may rewrite the file but the array is unchanged.
assert before["providers"][0]["versions"] == after["providers"][0]["versions"]

def test_unknown_provider_id_errors(self, tmp_path, capsys):
p = _write_providers(tmp_path, [{"id": "amazon", "version": "9.26.0", "versions": []}])
rc = patch(p, "nonexistent-provider", ["1.0.0"])
assert rc == 1
captured = capsys.readouterr()
assert "nonexistent-provider" in captured.out

def test_multiple_versions_one_call(self, tmp_path):
p = _write_providers(
tmp_path,
[{"id": "amazon", "version": "9.26.0", "versions": ["9.26.0"]}],
)
rc = patch(p, "amazon", ["9.24.0", "9.22.0"])
assert rc == 0
data = json.loads(p.read_text())
amazon = next(p for p in data["providers"] if p["id"] == "amazon")
# Sorted newest-first
assert amazon["versions"] == ["9.26.0", "9.24.0", "9.22.0"]

def test_invalid_version_falls_back_to_lex_sort(self, tmp_path):
p = _write_providers(
tmp_path,
[{"id": "weird", "version": "1.0.0", "versions": ["1.0.0", "not-a-semver"]}],
)
# Should not raise
rc = patch(p, "weird", ["custom-tag"])
assert rc == 0
data = json.loads(p.read_text())
weird = next(p for p in data["providers"] if p["id"] == "weird")
# All three should be present
assert set(weird["versions"]) == {"1.0.0", "not-a-semver", "custom-tag"}

def test_includes_latest_when_versions_was_empty(self, tmp_path):
"""Regression: empty `versions[]` must not exclude `provider.version`.

After patch, providerVersions.js treats non-empty versions[] as
authoritative. If we patch only the new version into an empty list,
the latest field would be excluded from the dropdown.
"""
p = _write_providers(
tmp_path,
[{"id": "newish", "version": "1.0.0", "versions": []}],
)
rc = patch(p, "newish", ["0.9.0"])
assert rc == 0
data = json.loads(p.read_text())
newish = next(p for p in data["providers"] if p["id"] == "newish")
assert "1.0.0" in newish["versions"]
assert "0.9.0" in newish["versions"]

def test_provider_version_field_unchanged(self, tmp_path):
"""The `version` (singular, latest) field is never touched by the patch."""
p = _write_providers(
tmp_path,
[{"id": "amazon", "version": "9.26.0", "versions": ["9.26.0", "9.25.0"]}],
)
rc = patch(p, "amazon", ["9.24.0"])
assert rc == 0
data = json.loads(p.read_text())
amazon = next(p for p in data["providers"] if p["id"] == "amazon")
# Latest pointer must remain 9.26.0 even though we added an older version.
assert amazon["version"] == "9.26.0"

def test_other_providers_untouched(self, tmp_path):
p = _write_providers(
tmp_path,
[
{"id": "amazon", "version": "9.26.0", "versions": ["9.26.0"]},
{"id": "google", "version": "21.2.0", "versions": ["21.2.0", "21.1.0"]},
],
)
rc = patch(p, "amazon", ["9.24.0"])
assert rc == 0
data = json.loads(p.read_text())
google = next(p for p in data["providers"] if p["id"] == "google")
assert google["versions"] == ["21.2.0", "21.1.0"]
assert google["version"] == "21.2.0"


class TestMainCli:
def test_main_exits_with_patch_returncode(self, tmp_path, capsys):
p = _write_providers(
tmp_path,
[{"id": "amazon", "version": "9.26.0", "versions": ["9.26.0"]}],
)
argv = [
"patch_providers_json.py",
"--providers-json",
str(p),
"--provider",
"amazon",
"--version",
"9.24.0",
]
with mock_patch.object(sys, "argv", argv):
rc = main()
assert rc == 0
data = json.loads(p.read_text())
amazon = next(p for p in data["providers"] if p["id"] == "amazon")
assert "9.24.0" in amazon["versions"]

def test_main_unknown_provider_exits_1(self, tmp_path):
p = _write_providers(
tmp_path,
[{"id": "amazon", "version": "9.26.0", "versions": []}],
)
argv = [
"patch_providers_json.py",
"--providers-json",
str(p),
"--provider",
"ghost",
"--version",
"1.0.0",
]
with mock_patch.object(sys, "argv", argv):
rc = main()
assert rc == 1

def test_main_requires_version_argument(self, tmp_path):
p = _write_providers(tmp_path, [])
argv = [
"patch_providers_json.py",
"--providers-json",
str(p),
"--provider",
"amazon",
]
with mock_patch.object(sys, "argv", argv):
with pytest.raises(SystemExit) as exc:
main()
assert exc.value.code != 0
Loading