From 4c6018f6dae4b0103d3b82f991dd3ec27797b7c5 Mon Sep 17 00:00:00 2001 From: Leon Haffmans <49658102+lord-haffi@users.noreply.github.com> Date: Mon, 29 Apr 2024 20:16:48 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=F0=9F=93=84Compatibility=20matrix=20-?= =?UTF-8?q?=20Detect=20changes=20between=20BO4E-versions=20(#751)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add code for compatibility matrix * Include in docs * Generate JSON-Schemas on RTD * 🩹 * 🩹 * 🩹cmon, fcking shell * 🩹 * debug * Maybe single quotes? * Why the fck it doesnt work * 🩹 * really? * sgtkrwn * sg< * As env var? * Now, please do it * 🩹 * more debug * ✨Add field documentation to description of the JSON-schemas * Fix exponential backtracking * Correct version regex * Add Json Schema build for /latest * Bump bost * 🩹 * 🩹 local test * 📄 * Allow testing via workflow_dispatch * 📄 * Test using env vars * Use env vars * Always update local compiled JSONs * More last versions to compare * Always update local compiled JSONs - Delete old folder * Use local links if build locally * Fix local docs building - make it easier * Get all versions since `v202401.0.0` * Add new Code to linting and type checking * 🚨linter + type checker * 📄from code review --- .github/workflows/docs_latest.yml | 23 +- .github/workflows/python-publish.yml | 7 +- .gitignore | 3 + docs/changelog.rst | 35 +++ docs/compatibility/__init__.py | 0 docs/compatibility/__main__.py | 228 +++++++++++++++++++ docs/compatibility/change_schemas.py | 76 +++++++ docs/compatibility/diff.py | 322 +++++++++++++++++++++++++++ docs/compatibility/loader.py | 47 ++++ docs/compatibility/matrix.py | 76 +++++++ docs/conf.py | 19 +- docs/requirements.in | 3 +- docs/requirements.txt | 56 ++++- generate_or_validate_json_schemas.py | 1 + tox.ini | 11 +- 15 files changed, 880 insertions(+), 27 deletions(-) create mode 100644 docs/compatibility/__init__.py create mode 100644 docs/compatibility/__main__.py create mode 100644 docs/compatibility/change_schemas.py create mode 100644 docs/compatibility/diff.py create mode 100644 docs/compatibility/loader.py create mode 100644 docs/compatibility/matrix.py diff --git a/.github/workflows/docs_latest.yml b/.github/workflows/docs_latest.yml index f6de00826..67b4a9c2e 100644 --- a/.github/workflows/docs_latest.yml +++ b/.github/workflows/docs_latest.yml @@ -5,6 +5,10 @@ on: branches: [main] # Allows you to run this workflow manually from the Actions tab + # If the workflow is triggered (manually, through workflow_dispatch) on another branch than the main-branch, + # then it will be published not under /latest but under test-XXXXXX where the X's are a 6-digit random number + # (starting with non-zero). + # You should remember to delete the generated test-XXXXXX folder on the gh-pages branch after you are done with them. workflow_dispatch: jobs: @@ -18,6 +22,12 @@ jobs: python-version: ["3.12"] os: [ubuntu-latest] steps: + - name: Set routing name to latest + if: github.ref == 'refs/heads/main' + run: echo "REF_NAME=latest" >> "$GITHUB_ENV" + - name: Set routing name to test-XXXXXX + if: github.ref != 'refs/heads/main' + run: echo "REF_NAME=test-$(shuf -i 100000-999999 -n 1)" >> "$GITHUB_ENV" - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} @@ -28,11 +38,13 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip + pip install tox pip install -r requirements.txt # Note: The sphinx action below can only install a single requirements file. - - name: Write version to conf.py - run: | - echo -e "version = release = \"latest\"\n" | cat - docs/conf.py > /tmp/conf.py && mv /tmp/conf.py docs/conf.py + - name: Build JSON Schemas + run: tox -e generate_json_schemas + env: + TARGET_VERSION: ${{ env.REF_NAME }} - name: Run kroki with docker run: | docker compose up -d @@ -43,9 +55,12 @@ jobs: with: requirements_path: docs/requirements.txt documentation_path: docs/ - target_path: latest/ + target_path: ${{ env.REF_NAME }} target_branch: gh-pages sphinx_options: -W -j auto + env: + SPHINX_DOCS_RELEASE: ${{ env.REF_NAME }} + SPHINX_DOCS_VERSION: ${{ env.REF_NAME }} - name: Push changes uses: ad-m/github-push-action@master with: diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 6057594b2..ba9130fdd 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -89,10 +89,6 @@ jobs: python -m pip install --upgrade pip pip install -r requirements.txt # Note: The sphinx action below can only install a single requirements file. - - name: Write version to conf.py - run: | - echo -e "version = release = \"${{ github.ref_name }}\"\n" | cat - docs/conf.py > /tmp/conf.py - mv /tmp/conf.py docs/conf.py - name: Run kroki with docker run: | docker compose up -d @@ -106,6 +102,9 @@ jobs: target_path: ${{ github.ref_name }} target_branch: gh-pages sphinx_options: -W -j auto + env: + SPHINX_DOCS_RELEASE: ${{ github.ref_name }} + SPHINX_DOCS_VERSION: ${{ github.ref_name }} - id: latest_bo4e name: Get latest BO4E release tag uses: pozetroninc/github-action-get-latest-release@master diff --git a/.gitignore b/.gitignore index 06f4fc067..d05cee03b 100644 --- a/.gitignore +++ b/.gitignore @@ -167,6 +167,7 @@ Temporary Items docs/api docs/plantuml.jar docs/_static/images +docs/compatibility_matrix.csv # version number for bo4e-python; gets auto-generated during the command # python -m build @@ -175,3 +176,5 @@ src/_bo4e_python_version.py # the autogenerated JSON schemas will be build and pushed to BO4E-Schemas # on release json_schemas/**/*.json + +tmp/ diff --git a/docs/changelog.rst b/docs/changelog.rst index 871950df3..f778e4807 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,2 +1,37 @@ .. _changes: + +================ +Compatibility +================ + +The table below shows the compatibility matrix of the last BO4E versions. + +Legend: + ++------+------------------------------------------------------+ +| 🟢 | | Compatible | +| | | No changes in the data model | ++------+------------------------------------------------------+ +| 🟡 | | Compatible | +| | | Only non-critical changes in the data model | +| | | e.g. added fields, changed doc strings | ++------+------------------------------------------------------+ +| 🔴 | | Incompatible | +| | | Critical changes in the data model | +| | | e.g. removed fields, changed types | ++------+------------------------------------------------------+ +| ➕ | | Compatible | +| | | Data model was added in this version | ++------+------------------------------------------------------+ +| ➖ | | Incompatible | +| | | Data model was removed in this version | ++------+------------------------------------------------------+ +| \- | | Data model not existent in this version | +| | | was removed before or will be added in future | ++------+------------------------------------------------------+ + +.. csv-table:: Compatibility matrix + :file: compatibility_matrix.csv + :header-rows: 1 + .. include:: ../CHANGELOG.rst diff --git a/docs/compatibility/__init__.py b/docs/compatibility/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/docs/compatibility/__main__.py b/docs/compatibility/__main__.py new file mode 100644 index 000000000..7405f4937 --- /dev/null +++ b/docs/compatibility/__main__.py @@ -0,0 +1,228 @@ +""" +This module provides functions to compare the BO4E JSON schemas of different versions. +It also contains functions to query GitHub for the latest BO4E versions to compare with the schemas of the current +work tree. +Additionally, it implements a little cache functionality to avoid multiple downloads of the same versions e.g. +if you're testing locally. +""" + +import itertools +import logging +import re +import shutil +from pathlib import Path +from typing import Any as _Any +from typing import Iterable + +import bost.operations +from bost import main as bost_main +from bost.operations import update_references as bost_update_references +from bost.pull import OWNER, REPO, SchemaMetadata, get_source_repo + +from . import change_schemas, diff, loader, matrix + +BO4E_BASE_DIR = Path(__file__).parents[2] / "tmp/bo4e_json_schemas" +LOCAL_JSON_SCHEMA_DIR = Path(__file__).parents[2] / "json_schemas" +logger = logging.getLogger(__name__) + + +def pull_bo4e_version(version: str, output: Path, gh_token: str | None = None) -> None: + """ + Pull the BO4E version from the given version string. + """ + bost_main( + output=output, + target_version=version, + update_refs=True, + set_default_version=False, + clear_output=True, + token=gh_token, + ) + + +def update_references(path: Path, version: str) -> None: + """ + Update the references in the given path. This step is needed for the local build. + """ + schema_namespace = {} + for schema_path in loader.get_namespace(path): + local_path = Path(path, *schema_path).with_suffix(".json") + schema_namespace[schema_path[-1]] = SchemaMetadata( + class_name=schema_path[-1], + download_url="", + module_path=schema_path, + file_path=local_path, + cached_path=local_path, + token=None, + ) + for schema_metadata in schema_namespace.values(): + bost_update_references(schema_metadata, schema_namespace, version) + schema_metadata.save() + + +def pull_or_reuse_bo4e_version(version: str, gh_token: str | None = None, from_local: bool = False) -> Path: + """ + Pull the BO4E version from the given version string or reuse the version if it was already pulled before. + If version is None use the BO4E version of the checkout working directory by assuming the compiled json + schemas in /json_schemas. + Returns the path of the bo4e directory. + """ + bo4e_dir = BO4E_BASE_DIR / version + + if from_local: + if not any(LOCAL_JSON_SCHEMA_DIR.rglob("*.json")): + raise ValueError( + "No local json schemas found in /json_schemas. " + "Please ensure that the json schemas are build on beforehand." + ) + if bo4e_dir.exists(): + shutil.rmtree(bo4e_dir) + shutil.copytree(LOCAL_JSON_SCHEMA_DIR, bo4e_dir) + update_references(bo4e_dir, version) + elif any(bo4e_dir.rglob("*.json")): + return bo4e_dir + else: + pull_bo4e_version(version, bo4e_dir, gh_token) + return bo4e_dir + + +def compare_bo4e_versions( + version_old: str, version_new: str, gh_token: str | None = None, from_local: bool = False +) -> Iterable[change_schemas.Change]: + """ + Compare the old version with the new version. + If version_new is None use the BO4E version of the checkout working directory by assuming the compiled json + schemas in /json_schemas. + """ + dir_old_schemas = pull_or_reuse_bo4e_version(version_old, gh_token) + dir_new_schemas = pull_or_reuse_bo4e_version(version_new, gh_token, from_local=from_local) + print(f"Comparing {version_old} with {version_new}") + yield from diff.diff_schemas(dir_old_schemas, dir_new_schemas) + + +def compare_bo4e_versions_iteratively( + versions: Iterable[str], cur_version: str | None = None, gh_token: str | None = None +) -> dict[tuple[str, str], Iterable[change_schemas.Change]]: + """ + Compare the versions iteratively. Each version at index i will be compared to the version at index i+1. + Additionally, if cur_version is provided, the last version in the list will be compared to the version + in the checkout working directory. The value of cur_version will be used to set the key in the returned + dict. + Note: + - versions must contain at least one element. + - versions should be sorted in ascending order. + - if using cur_version, ensure that the json schemas of the checkout working directory + were build on beforehand. They should be located in /json_schemas. + """ + print(f"Comparing versions {versions} with cur_version {cur_version}") + changes = {} + last_version: str = "" # This value is never used but makes mypy and pylint happy + for version_old, version_new in itertools.pairwise(versions): + last_version = version_new + changes[version_old, version_new] = compare_bo4e_versions(version_old, version_new, gh_token) + if cur_version is not None: + changes[last_version, cur_version] = compare_bo4e_versions(last_version, cur_version, gh_token, from_local=True) + print("Comparisons finished.") + return changes + + +REGEX_RELEASE_VERSION = re.compile(r"^v(\d{6}\.\d+\.\d+)$") +REGEX_RELEASE_CANDIDATE_VERSION = re.compile(r"^v(\d{6}\.\d+\.\d+)-rc\d+$") + + +def get_last_n_release_versions(n: int, include_rc: bool = False, gh_token: str | None = None) -> Iterable[str]: + """ + Get the last n release versions from the BO4E repository. + """ + repo = get_source_repo(gh_token) + releases = repo.get_releases() + counter = 0 + + for release in releases: + if not REGEX_RELEASE_VERSION.fullmatch(release.tag_name) and ( + not include_rc or not REGEX_RELEASE_CANDIDATE_VERSION.fullmatch(release.tag_name) + ): + continue + counter += 1 + yield release.tag_name + if counter >= n: + return + + logger.warning("Only %d matching releases found. Returning all releases.", counter) + + +def get_all_release_versions_since_20240100(include_rc: bool = False, gh_token: str | None = None) -> Iterable[str]: + """ + Get all release versions since v202401.0.0 from the BO4E repository. + """ + repo = get_source_repo(gh_token) + releases = repo.get_releases() + version_threshold = "v202401.0.0" + + for release in releases: + if not REGEX_RELEASE_VERSION.fullmatch(release.tag_name) and ( + not include_rc or not REGEX_RELEASE_CANDIDATE_VERSION.fullmatch(release.tag_name) + ): + continue + yield release.tag_name + if release.tag_name == version_threshold: + return + + logger.warning("Threshold version %s not found. Returned all matching releases.", version_threshold) + + +def _monkey_patch_bost_regex_if_local_testing(version: str) -> None: + regex_expected_version = re.compile(r"^v\d+\.\d+\.\d+(?:-rc\d+)?$") + if not regex_expected_version.fullmatch(version): + bost.operations.REF_ONLINE_REGEX = re.compile( + rf"^https://raw\.githubusercontent\.com/(?:{OWNER.upper()}|{OWNER.lower()}|Hochfrequenz)/{REPO}/" + rf"(?P[^/]+)/" + r"src/bo4e_schemas/(?P(?:\w+/)*)(?P\w+)\.json#?$" + ) + + +def create_tables_for_doc( + compatibility_matrix_output_file: Path, + gh_version: str, + *, + gh_token: str | None = None, + last_n_versions: int = 2, +) -> None: + """ + Creates the compatibility matrix for the documentation. The output is a csv file. This can be referenced + inside Sphinx documentation. See https://sublime-and-sphinx-guide.readthedocs.io/en/latest/tables.html#csv-files + for more information. + If you have problems with rate limiting, please set gh_token. + The compatibility matrix will be built for last_n_versions + the current version in the checkout working directory. + If you set last_n_versions = 0 all versions since v202401.0.0 will be compared. + Note: The matrix will never contain the first version as column. Each column is a comparison to the version before. + Note: Release candidates are excluded. + """ + _monkey_patch_bost_regex_if_local_testing(gh_version) + logger.info("Retrieving the last %d release versions", last_n_versions) + if last_n_versions > 0: + versions = list(reversed(list(get_last_n_release_versions(last_n_versions, gh_token=gh_token)))) + else: + versions = list(reversed(list(get_all_release_versions_since_20240100(gh_token=gh_token)))) + logger.info("Comparing versions iteratively: %s", " -> ".join([*versions, gh_version])) + changes_iterables = compare_bo4e_versions_iteratively(versions, gh_version, gh_token=gh_token) + logger.info("Building namespaces") + changes = {key: list(value) for key, value in changes_iterables.items()} + namespaces = {version: list(loader.get_namespace(BO4E_BASE_DIR / version)) for version in versions} + namespaces[gh_version] = list(loader.get_namespace(BO4E_BASE_DIR / gh_version)) + logger.info("Creating compatibility matrix") + matrix.create_compatibility_matrix_csv( + compatibility_matrix_output_file, [*versions, gh_version], namespaces, changes + ) + + +def test_create_tables_for_doc() -> None: + """ + Test the create_tables_for_doc function locally without building the entire documentation. + Needs the JSON schemas to be present in /json_schemas with TARGET_VERSION set to "local". + """ + create_tables_for_doc( + Path(__file__).parents[1] / "compatibility_matrix.csv", + "local", + last_n_versions=3, + ) diff --git a/docs/compatibility/change_schemas.py b/docs/compatibility/change_schemas.py new file mode 100644 index 000000000..9716af1b7 --- /dev/null +++ b/docs/compatibility/change_schemas.py @@ -0,0 +1,76 @@ +""" +Contains the classes to model changes between two BO4E versions. +""" + +from enum import StrEnum +from typing import Any, Iterable + +from pydantic import BaseModel + + +class ChangeType(StrEnum): + """ + This enum class lists the different types of changes of a single change between two BO4E versions. + """ + + FIELD_ADDED = "field_added" + FIELD_REMOVED = "field_removed" + FIELD_DEFAULT_CHANGED = "field_default_changed" + FIELD_DESCRIPTION_CHANGED = "field_description_changed" + # field type change types + FIELD_CARDINALITY_CHANGED = "field_cardinality_changed" + FIELD_REFERENCE_CHANGED = "field_reference_changed" + FIELD_STRING_FORMAT_CHANGED = "field_string_format_changed" + FIELD_ANY_OF_TYPE_ADDED = "field_any_of_type_added" + FIELD_ANY_OF_TYPE_REMOVED = "field_any_of_type_removed" + FIELD_ALL_OF_TYPE_ADDED = "field_all_of_type_added" + FIELD_ALL_OF_TYPE_REMOVED = "field_all_of_type_removed" + FIELD_TYPE_CHANGED = "field_type_changed" # An arbitrary unclassified change in type + + CLASS_ADDED = "class_added" + CLASS_REMOVED = "class_removed" + CLASS_DESCRIPTION_CHANGED = "class_description_changed" + + ENUM_VALUE_ADDED = "enum_value_added" + ENUM_VALUE_REMOVED = "enum_value_removed" + + +class Change(BaseModel): + """ + This pydantic class models a single change between two BO4E versions. + """ + + type: ChangeType + old: Any + new: Any + old_trace: str + new_trace: str + + def __str__(self) -> str: + return f"{self.type}: {self.old} -> {self.new}" + + +def is_change_critical(change: Change) -> bool: + """ + This function checks if a change is critical i.e. if the new value is incompatible to the old value. + """ + return change.type in ( + ChangeType.FIELD_REMOVED, + ChangeType.FIELD_TYPE_CHANGED, + ChangeType.FIELD_CARDINALITY_CHANGED, + ChangeType.FIELD_REFERENCE_CHANGED, + ChangeType.FIELD_STRING_FORMAT_CHANGED, + ChangeType.FIELD_ANY_OF_TYPE_ADDED, + ChangeType.FIELD_ANY_OF_TYPE_REMOVED, + ChangeType.FIELD_ALL_OF_TYPE_ADDED, + ChangeType.FIELD_ALL_OF_TYPE_REMOVED, + ChangeType.CLASS_REMOVED, + ChangeType.ENUM_VALUE_REMOVED, + ) + + +def filter_non_crit(changes: Iterable[Change]) -> Iterable[Change]: + """ + This function filters out all non-critical changes. + """ + return (change for change in changes if is_change_critical(change)) diff --git a/docs/compatibility/diff.py b/docs/compatibility/diff.py new file mode 100644 index 000000000..1cdc0fd2c --- /dev/null +++ b/docs/compatibility/diff.py @@ -0,0 +1,322 @@ +""" +Contains the logic to detect the different changes between two BO4E versions. +""" + +import re +from pathlib import Path +from typing import Any as _Any +from typing import Iterable + +from bost.schema import AllOf, AnyOf, Array, Object, Reference, SchemaRootType, SchemaType, StrEnum, String, TypeBase + +from . import change_schemas, loader + +REGEX_IGNORE_VERSION = re.compile(r"v\d+\.\d+\.\d+(-rc\d+)?") + + +def _diff_type_base( + schema_old: TypeBase, schema_new: TypeBase, old_trace: str, new_trace: str +) -> Iterable[change_schemas.Change]: + """ + This function compares two type base schemas and yields the changes. + """ + if schema_old.title != schema_new.title: + raise RuntimeError( + ( + "Title should not change. Renaming is not detectable and the titles are autogenerated.\n" + f"{schema_old.title} -> {schema_new.title}" + ) + ) + if REGEX_IGNORE_VERSION.sub(schema_old.description, "{__gh_version__}") != REGEX_IGNORE_VERSION.sub( + schema_new.description, "{__gh_version__}" + ): + yield change_schemas.Change( + type=change_schemas.ChangeType.FIELD_DESCRIPTION_CHANGED, + old=schema_old.description, + new=schema_new.description, + old_trace=old_trace, + new_trace=new_trace, + ) + if schema_old.default != schema_new.default and schema_old.title != " Version" and schema_new.title != " Version": + yield change_schemas.Change( + type=change_schemas.ChangeType.FIELD_DEFAULT_CHANGED, + old=schema_old.default, + new=schema_new.default, + old_trace=old_trace, + new_trace=new_trace, + ) + + +def _diff_enum_schemas( + schema_old: StrEnum, schema_new: StrEnum, old_trace: str, new_trace: str +) -> Iterable[change_schemas.Change]: + """ + This function compares two enum schemas and yields the changes. + """ + new_enum_values = set(schema_new.enum) + for old_enum_value in schema_old.enum: + if old_enum_value not in new_enum_values: + yield change_schemas.Change( + type=change_schemas.ChangeType.ENUM_VALUE_REMOVED, + old=old_enum_value, + new=None, + old_trace=old_trace, + new_trace=new_trace, + ) + else: + new_enum_values.remove(old_enum_value) + for new_enum_value in new_enum_values: + yield change_schemas.Change( + type=change_schemas.ChangeType.ENUM_VALUE_ADDED, + old=None, + new=new_enum_value, + old_trace=old_trace, + new_trace=new_trace, + ) + + +def _diff_object_schemas( + schema_old: Object, schema_new: Object, old_trace: str, new_trace: str +) -> Iterable[change_schemas.Change]: + """ + This function compares two object schemas and yields the changes. + """ + new_properties = set(schema_new.properties.keys()) + for key, value in schema_old.properties.items(): + if key not in schema_new.properties: + yield change_schemas.Change( + type=change_schemas.ChangeType.FIELD_REMOVED, + old=value, + new=None, + old_trace=f"{old_trace}.properties['{key}']", + new_trace=new_trace, + ) + else: + new_properties.remove(key) + + # Field exists in both schemas, check for further changes + yield from _diff_schema_type( + value, + schema_new.properties[key], + f"{old_trace}.properties['{key}']", + f"{new_trace}.properties['{key}']", + ) + for key in new_properties: + yield change_schemas.Change( + type=change_schemas.ChangeType.FIELD_ADDED, + old=None, + new=schema_new.properties[key], + old_trace=old_trace, + new_trace=f"{new_trace}.properties['{key}']", + ) + + +def _diff_ref_schemas( + schema_old: Reference, schema_new: Reference, old_trace: str, new_trace: str +) -> Iterable[change_schemas.Change]: + """ + This function compares two reference schemas and yields a change if the references are different. + Even if the referenced schema only got renamed or moved, the reference will be treated as different + because in any client application you would have to update the references. + """ + if schema_old.ref != schema_new.ref: + yield change_schemas.Change( + type=change_schemas.ChangeType.FIELD_REFERENCE_CHANGED, + old=schema_old.ref, + new=schema_new.ref, + old_trace=old_trace, + new_trace=new_trace, + ) + + +def _diff_array_schemas( + schema_old: Array, schema_new: Array, old_trace: str, new_trace: str +) -> Iterable[change_schemas.Change]: + """ + This function compares two array schemas and yields the changes. + """ + yield from _diff_schema_type(schema_old.items, schema_new.items, f"{old_trace}.items", f"{new_trace}.items") + + +def _diff_any_of_or_all_of_schemas( + schema_old: AnyOf | AllOf, schema_new: AnyOf | AllOf, old_trace: str, new_trace: str +) -> Iterable[change_schemas.Change]: + """ + This function compares two anyOf or allOf schemas and yields the changes. + """ + assert type(schema_old) is type(schema_new), "Internal error: This function should only be called for equal types" + if isinstance(schema_old, AnyOf): + query_str = "any_of" + else: + query_str = "all_of" + found_new_types = set() + for old_index, old_type in enumerate(getattr(schema_old, query_str)): + found_old_in_new = False + for new_index, new_type in enumerate(getattr(schema_new, query_str)): + changes = list( + _diff_schema_type( + old_type, new_type, f"{old_trace}.{query_str}[{old_index}]", f"{new_trace}.{query_str}[{new_index}]" + ) + ) + if not any(change_schemas.filter_non_crit(changes)): + # The types are equal (except for non-critical changes), yield the non-critical changes + found_old_in_new = True + assert new_index not in found_new_types, "Internal error: Duplicate type in anyOf" + found_new_types.add(new_index) + yield from changes + break + if not found_old_in_new: + yield change_schemas.Change( + type=( + change_schemas.ChangeType.FIELD_ANY_OF_TYPE_REMOVED + if isinstance(schema_old, AnyOf) + else change_schemas.ChangeType.FIELD_ALL_OF_TYPE_REMOVED + ), + old=old_type, + new=None, + old_trace=f"{old_trace}.{query_str}[{old_index}]", + new_trace=f"{new_trace}.{query_str}", + ) + not_found_indices = set(range(len(getattr(schema_new, query_str)))) - found_new_types + for new_index in not_found_indices: + yield change_schemas.Change( + type=( + change_schemas.ChangeType.FIELD_ANY_OF_TYPE_ADDED + if isinstance(schema_old, AnyOf) + else change_schemas.ChangeType.FIELD_ALL_OF_TYPE_ADDED + ), + old=None, + new=getattr(schema_new, query_str)[new_index], + old_trace=old_trace, + new_trace=f"{new_trace}.{query_str}[{new_index}]", + ) + + +def _diff_string_schemas( + schema_old: String, schema_new: String, old_trace: str, new_trace: str +) -> Iterable[change_schemas.Change]: + """ + This function compares two string schemas and yields the changes. + """ + if schema_old.format != schema_new.format: + yield change_schemas.Change( + type=change_schemas.ChangeType.FIELD_STRING_FORMAT_CHANGED, + old=schema_old.format, + new=schema_new.format, + old_trace=old_trace, + new_trace=new_trace, + ) + + +def _diff_schema_differing_types( + schema_old: SchemaType, schema_new: SchemaType, old_trace: str, new_trace: str +) -> Iterable[change_schemas.Change]: + """ + This function compares two differing schema types and yields the changes. + """ + assert type(schema_old) is not type( + schema_new + ), "Internal error: This function should only be called for differing types" + # Types are different. Check if it is "only" a change in cardinality + if isinstance(schema_old, Object) and isinstance(schema_new, Array): + sub_changes = list(_diff_schema_type(schema_old, schema_new.items, old_trace, f"{new_trace}.items")) + elif isinstance(schema_old, Array) and isinstance(schema_new, Object): + sub_changes = list(_diff_schema_type(schema_old.items, schema_new, f"{old_trace}.items", new_trace)) + else: + sub_changes = None + + if sub_changes is None or any(change_schemas.filter_non_crit(sub_changes)): + # Treat the types as equal iff there are no critical changes between the types + # In if-Block, the types are different + yield change_schemas.Change( + type=change_schemas.ChangeType.FIELD_TYPE_CHANGED, + old=schema_old, + new=schema_new, + old_trace=old_trace, + new_trace=new_trace, + ) + else: + # If the types are equal (except for non-critical changes), yield the non-critical changes + # plus a change in cardinality + yield from sub_changes + # If the type of one schema is equal to the items type of the other, there is a change in cardinality + yield change_schemas.Change( + type=change_schemas.ChangeType.FIELD_CARDINALITY_CHANGED, + old=schema_old, + new=schema_new, + old_trace=old_trace, + new_trace=new_trace, + ) + + +def _diff_schema_type( + schema_old: SchemaType, schema_new: SchemaType, old_trace: str, new_trace: str +) -> Iterable[change_schemas.Change]: + """ + This function compares two schema types and yields the changes. + """ + yield from _diff_type_base(schema_old, schema_new, old_trace, new_trace) + if type(schema_old) is not type(schema_new): + yield from _diff_schema_differing_types(schema_old, schema_new, old_trace, new_trace) + # Even if the types are equal on this shallow level, we must do some more checks for certain + # types. + elif isinstance(schema_new, StrEnum): + yield from _diff_enum_schemas(schema_old, schema_new, old_trace, new_trace) # type: ignore[arg-type] + # mypy isn't able to know that type(schema_new) is type(schema_old) here (and in the following) + elif isinstance(schema_new, Object): + yield from _diff_object_schemas(schema_old, schema_new, old_trace, new_trace) # type: ignore[arg-type] + elif isinstance(schema_new, Reference): + yield from _diff_ref_schemas(schema_old, schema_new, old_trace, new_trace) # type: ignore[arg-type] + elif isinstance(schema_new, Array): + yield from _diff_array_schemas(schema_old, schema_new, old_trace, new_trace) # type: ignore[arg-type] + elif isinstance(schema_new, (AnyOf, AllOf)): + yield from _diff_any_of_or_all_of_schemas( + schema_old, # type: ignore[arg-type] + schema_new, + old_trace, + new_trace, + ) + # Any other types are definitely equal at this point + + +def _diff_root_schemas( + schema_old: SchemaRootType, schema_new: SchemaRootType, old_trace: str, new_trace: str +) -> Iterable[change_schemas.Change]: + """ + This function compares two root schemas and yields the changes. + """ + yield from _diff_schema_type(schema_old, schema_new, old_trace, new_trace) + + +def diff_schemas(schemas_old: Path, schemas_new: Path) -> Iterable[change_schemas.Change]: + """ + This function compares two BO4E versions and yields the changes. + Note: The paths to the old and the new schemas should correspond to the same root node of the tree structure. + I.e. the direct subdirectories should be "bo", "com" and "enum". + """ + old_schema_files = {file.relative_to(schemas_old) for file in schemas_old.rglob("*.json")} + new_schema_files = {file.relative_to(schemas_new) for file in schemas_new.rglob("*.json")} + + for schema_file in old_schema_files - new_schema_files: + yield change_schemas.Change( + type=change_schemas.ChangeType.CLASS_REMOVED, + old=loader.load_schema_file(schemas_old / schema_file), + new=None, + old_trace=f"{'/'.join(schema_file.with_suffix('').parts)}#", + new_trace="#", + ) + for schema_file in new_schema_files - old_schema_files: + yield change_schemas.Change( + type=change_schemas.ChangeType.CLASS_ADDED, + old=None, + new=loader.load_schema_file(schemas_new / schema_file), + old_trace="#", + new_trace=f"{'/'.join(schema_file.with_suffix('').parts)}#", + ) + for schema_file in old_schema_files & new_schema_files: + yield from _diff_root_schemas( + loader.load_schema_file(schemas_old / schema_file), + loader.load_schema_file(schemas_new / schema_file), + f"{'/'.join(schema_file.with_suffix('').parts)}#", + f"{'/'.join(schema_file.with_suffix('').parts)}#", + ) diff --git a/docs/compatibility/loader.py b/docs/compatibility/loader.py new file mode 100644 index 000000000..3235eea47 --- /dev/null +++ b/docs/compatibility/loader.py @@ -0,0 +1,47 @@ +""" +Contains functions to load and save schema files and changes +""" + +import json +from pathlib import Path +from typing import Iterable + +from bost.schema import SchemaRootType +from pydantic import TypeAdapter + +from . import change_schemas + + +def load_schema_file(path: Path) -> SchemaRootType: + """ + Load a schema file and return the parsed schema + """ + return TypeAdapter(SchemaRootType).validate_json(path.read_text("utf-8")) # type: ignore[return-value] + # mypy has problems to infer the Union type here. + + +def load_changes(path: Path) -> list[change_schemas.Change]: + """ + Load a changes file and return the parsed changes + """ + return TypeAdapter(list[change_schemas.Change]).validate_json(path.read_text("utf-8")) + + +def save_changes(path: Path, changes: Iterable[change_schemas.Change]) -> None: + """ + Save the changes to a file + """ + with open(path, "w", encoding="utf-8") as file: + json.dump( + TypeAdapter(list[change_schemas.Change]).dump_python(list(changes), mode="json"), + file, + ) + + +def get_namespace(path: Path) -> Iterable[tuple[str, ...]]: + """ + Get the namespace from a file + """ + for schema_file in path.rglob("*.json"): + sub_path = schema_file.relative_to(path).parts[:-1] + yield *sub_path, schema_file.stem diff --git a/docs/compatibility/matrix.py b/docs/compatibility/matrix.py new file mode 100644 index 000000000..529470eba --- /dev/null +++ b/docs/compatibility/matrix.py @@ -0,0 +1,76 @@ +""" +This module contains the logic to create the compatibility matrix from a list of changes. +""" + +import csv +import itertools +from enum import StrEnum +from pathlib import Path +from typing import Any as _Any +from typing import Mapping, Sequence + +from . import change_schemas + + +class ChangeSymbol(StrEnum): + """ + This enum class lists the different symbols of changes in the compatibility matrix. + """ + + CHANGE_NONE = "🟢" + CHANGE_NON_CRITICAL = "🟡" + CHANGE_CRITICAL = "🔴" + NON_EXISTENT = "\\-" + ADDED = "➕" + REMOVED = "➖" + + +def determine_symbol( + changes: Sequence[change_schemas.Change], namespace: Sequence[tuple[str, ...]], cls: tuple[str, ...] +) -> ChangeSymbol: + """ + Determine the symbol of a change. + """ + if len(changes) == 1 and changes[0].type == change_schemas.ChangeType.CLASS_REMOVED: + return ChangeSymbol.REMOVED + if len(changes) == 1 and changes[0].type == change_schemas.ChangeType.CLASS_ADDED: + return ChangeSymbol.ADDED + if cls not in namespace: + return ChangeSymbol.NON_EXISTENT + if len(changes) == 0: + return ChangeSymbol.CHANGE_NONE + + assert all( + change.type not in (change_schemas.ChangeType.CLASS_ADDED, change_schemas.ChangeType.CLASS_REMOVED) + for change in changes + ), "Internal error: CLASS_ADDED and CLASS_REMOVED must be the only change per class if present." + if any(change_schemas.is_change_critical(change) for change in changes): + return ChangeSymbol.CHANGE_CRITICAL + return ChangeSymbol.CHANGE_NON_CRITICAL + + +def create_compatibility_matrix_csv( + output: Path, + versions: Sequence[str], + namespaces: Mapping[str, Sequence[tuple[str, ...]]], + changes: Mapping[tuple[str, str], Sequence[change_schemas.Change]], +) -> None: + """ + Create a compatibility matrix csv file from the given changes. + """ + with open(output, "w", encoding="utf-8") as file: + csv_writer = csv.writer(file, delimiter=",", lineterminator="\n", escapechar="/") + csv_writer.writerow(("", *versions[1:])) + all_classes: set[tuple[str, ...]] = set(itertools.chain.from_iterable(namespaces.values())) + + for class_path in sorted(all_classes, key=lambda cls: tuple(cls_part.lower() for cls_part in cls)): + row = [class_path[-1]] + class_path_str = "/".join(class_path) + "#" + for version_old, version_new in itertools.pairwise(versions): + changes_related_to_class = [ + change + for change in changes[(version_old, version_new)] + if change.old_trace.startswith(class_path_str) or change.new_trace.startswith(class_path_str) + ] + row.append(determine_symbol(changes_related_to_class, namespaces[version_new], class_path).value) + csv_writer.writerow(row) diff --git a/docs/conf.py b/docs/conf.py index 167e99cbf..7daefb38b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -7,7 +7,6 @@ # # All configuration values have a default; values that are commented out # serve to show the default. - import inspect import os import shutil @@ -22,7 +21,9 @@ sys.path.insert(0, os.path.join(__location__, "../src")) sys.path.insert(0, os.path.join(__location__, "../docs")) +sys.path.insert(0, os.path.join(__location__, "../docs/compatibility")) import uml +from compatibility.__main__ import create_tables_for_doc # import package bo4e to clarify namespaces and prevent circular import errors from bo4e import * @@ -170,9 +171,13 @@ # Note: For the deployment to GitHub Pages the release and version values will # be set by the action. This is to support things like /latest or /stable. if "release" not in globals(): - from bo4e import __gh_version__ as release + release = os.getenv("SPHINX_DOCS_RELEASE") + if release is None: + from bo4e import __gh_version__ as release if "version" not in globals(): - from bo4e import __version__ as version + version = os.getenv("SPHINX_DOCS_VERSION") + if version is None: + from bo4e import __version__ as version print(f"Got version = {version} from __version__") print(f"Got release = {release} from __gh_version__") @@ -304,7 +309,8 @@ # Create UML diagrams in plantuml format. Compile these into svg files into the _static folder. # See docs/uml.py for more details. -uml.LINK_URI_BASE = f"https://bo4e.github.io/BO4E-python/{release}" +if release != "local": + uml.LINK_URI_BASE = f"https://bo4e.github.io/BO4E-python/{release}" _exec_plantuml = Path(__location__) / "plantuml.jar" _network, _namespaces_to_parse = uml.build_network(Path(module_dir), uml.PlantUMLNetwork) print(_network) @@ -313,3 +319,8 @@ uml.compile_files_kroki(Path(output_dir) / "uml", Path(output_dir).parent / "_static" / "images", locally_hosted=True) print(f"Compiled uml files into svg using kroki.") + +# Create compatibility matrix +compatibility_matrix_output_file = Path(__file__).parent / "compatibility_matrix.csv" +gh_token = os.getenv("GITHUB_ACCESS_TOKEN") or os.getenv("GITHUB_TOKEN") +create_tables_for_doc(compatibility_matrix_output_file, release, last_n_versions=0, gh_token=gh_token) diff --git a/docs/requirements.in b/docs/requirements.in index bc74298b9..7d9c8f56a 100644 --- a/docs/requirements.in +++ b/docs/requirements.in @@ -6,5 +6,4 @@ requests Sphinx sphinx_rtd_theme typeguard -# any dependency here should also be a dependency in the docs section of tox.ini -# see issue https://github.com/bo4e/BO4E-python/issues/474 for a real fix that does not require manual adaptions +BO4E-Schema-Tool diff --git a/docs/requirements.txt b/docs/requirements.txt index f2148e7b4..e7216efe3 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # # pip-compile '.\docs\requirements.in' @@ -10,10 +10,26 @@ annotated-types==0.5.0 # via pydantic babel==2.12.1 # via sphinx +bo4e-schema-tool==0.0.7 + # via -r .\docs\requirements.in certifi==2023.7.22 # via requests +cffi==1.16.0 + # via + # cryptography + # pynacl charset-normalizer==2.1.0 # via requests +click==8.1.7 + # via bo4e-schema-tool +colorama==0.4.6 + # via + # click + # sphinx +cryptography==42.0.5 + # via pyjwt +deprecated==1.2.14 + # via pygithub docutils==0.18.1 # via # sphinx @@ -23,32 +39,46 @@ idna==3.7 imagesize==1.4.1 # via sphinx iso3166==2.1.1 - # via -r requirements.in + # via -r .\docs\requirements.in jinja2==3.1.3 # via sphinx markupsafe==2.1.3 # via jinja2 +more-itertools==10.2.0 + # via bo4e-schema-tool networkx==3.3 - # via -r requirements.in + # via -r .\docs\requirements.in packaging==24.0 # via sphinx +pycparser==2.21 + # via cffi pydantic==2.4.2 - # via -r requirements.in + # via + # -r .\docs\requirements.in + # bo4e-schema-tool pydantic-core==2.10.1 # via pydantic +pygithub==2.2.0 + # via bo4e-schema-tool pygments==2.16.1 # via sphinx pyhumps==3.8.0 - # via -r requirements.in + # via -r .\docs\requirements.in +pyjwt[crypto]==2.8.0 + # via pygithub +pynacl==1.5.0 + # via pygithub requests==2.31.0 # via - # -r requirements.in + # -r .\docs\requirements.in + # bo4e-schema-tool + # pygithub # sphinx snowballstemmer==2.2.0 # via sphinx sphinx==7.2.6 # via - # -r requirements.in + # -r .\docs\requirements.in # sphinx-rtd-theme # sphinxcontrib-applehelp # sphinxcontrib-devhelp @@ -57,7 +87,7 @@ sphinx==7.2.6 # sphinxcontrib-qthelp # sphinxcontrib-serializinghtml sphinx-rtd-theme==1.3.0 - # via -r requirements.in + # via -r .\docs\requirements.in sphinxcontrib-applehelp==1.0.7 # via sphinx sphinxcontrib-devhelp==1.0.5 @@ -73,11 +103,15 @@ sphinxcontrib-qthelp==1.0.6 sphinxcontrib-serializinghtml==1.1.9 # via sphinx typeguard==4.1.5 - # via -r requirements.in + # via -r .\docs\requirements.in typing-extensions==4.11.0 # via # pydantic # pydantic-core - # typeguard + # pygithub urllib3==2.2.1 - # via requests + # via + # pygithub + # requests +wrapt==1.16.0 + # via deprecated diff --git a/generate_or_validate_json_schemas.py b/generate_or_validate_json_schemas.py index 193b377d9..84f52f96e 100644 --- a/generate_or_validate_json_schemas.py +++ b/generate_or_validate_json_schemas.py @@ -191,6 +191,7 @@ def traverse_dict(obj: dict[str, Any]) -> None: ) def generate_or_validate_json_schemas(mode: Literal["validate", "generate"], target_version: str) -> None: """generate json schemas for all BOs and COMs""" + _logger.debug("Mode: %s, target version: %s", mode, target_version) packages = ["bo", "com", "enum"] if mode == "generate": diff --git a/tox.ini b/tox.ini index db25bdd66..bfe8aaccb 100644 --- a/tox.ini +++ b/tox.ini @@ -28,6 +28,7 @@ deps = commands = pylint src/bo4e pylint docs/uml.py + pylint docs/compatibility pylint generate_or_validate_json_schemas.py [testenv:type_check] @@ -42,6 +43,7 @@ commands = mypy --show-error-codes src/bo4e mypy --show-error-codes tests mypy --show-error-codes docs/uml.py + mypy --show-error-codes docs/compatibility mypy --show-error-codes generate_or_validate_json_schemas.py # add single files (ending with .py) or packages here @@ -80,10 +82,15 @@ commands = deps = -r requirements.txt -r docs/requirements.txt + {[testenv:generate_json_schemas]deps} # any dependency added here should also be added in docs/requirements.in and docs/requirements.txt respectively +setenv = + TARGET_VERSION = {env:TARGET_VERSION:local} + SPHINX_DOCS_RELEASE = {env:TARGET_VERSION:local} + SPHINX_DOCS_VERSION = {env:TARGET_VERSION:local} commands = - sphinx-build -W -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html - sphinx-build -n -T -W -b doctest -d {envtmpdir}/doctrees docs docs/_build/html + {[testenv:generate_json_schemas]commands} + sphinx-build -T -W -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html python -m doctest README.rst