From 6e29cdb356490ca76af424dd52d8258d0e8dcb6d Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Mon, 6 Apr 2026 22:09:07 +0200 Subject: [PATCH 1/3] Simplify manifest class --- dfetch/commands/add.py | 2 +- dfetch/commands/freeze.py | 2 +- dfetch/commands/import_.py | 10 +- dfetch/manifest/manifest.py | 338 +++++++++++++++++++++--------------- dfetch/manifest/project.py | 24 +++ tests/test_add.py | 34 ++-- tests/test_manifest.py | 130 +++++++++++++- 7 files changed, 376 insertions(+), 164 deletions(-) diff --git a/dfetch/commands/add.py b/dfetch/commands/add.py index 594a48456..a66f4f006 100644 --- a/dfetch/commands/add.py +++ b/dfetch/commands/add.py @@ -253,7 +253,7 @@ def _finalize_add( return superproject.manifest.append_project_entry(project_entry) - superproject.manifest.update_dump() + superproject.manifest.dump() logger.print_info_line( project_entry.name, f"Added '{project_entry.name}' to manifest '{superproject.manifest.path}'", diff --git a/dfetch/commands/freeze.py b/dfetch/commands/freeze.py index a9b024b12..5e4cb21bd 100644 --- a/dfetch/commands/freeze.py +++ b/dfetch/commands/freeze.py @@ -135,5 +135,5 @@ def __call__(self, args: argparse.Namespace) -> None: manifest_updated = True if manifest_updated: - superproject.manifest.update_dump() + superproject.manifest.dump() logger.info(f"Updated manifest ({manifest_path}) in {os.getcwd()}") diff --git a/dfetch/commands/import_.py b/dfetch/commands/import_.py index cb7a5b21a..cecb537b7 100644 --- a/dfetch/commands/import_.py +++ b/dfetch/commands/import_.py @@ -58,11 +58,19 @@ def __call__(self, _: argparse.Namespace) -> None: project.set_remote(remote) break + single_remote = len(remotes) == 1 + project_dicts = [] + for p in projects: + d = p.as_yaml() + if single_remote: + d.pop("remote", None) + project_dicts.append(d) + manifest_data = { "manifest": { "version": "0.0", "remotes": [r.as_yaml() for r in remotes], - "projects": [p.as_yaml() for p in projects], + "projects": project_dicts, } } manifest = Manifest.from_yaml(yaml.dump(manifest_data, sort_keys=False)) diff --git a/dfetch/manifest/manifest.py b/dfetch/manifest/manifest.py index 3c6005932..79d04e8d1 100644 --- a/dfetch/manifest/manifest.py +++ b/dfetch/manifest/manifest.py @@ -29,7 +29,7 @@ import yaml from strictyaml import YAML, StrictYAMLError, YAMLValidationError, load -from strictyaml.ruamel.comments import CommentedMap +from strictyaml.ruamel.comments import CommentedMap, CommentedSeq from strictyaml.ruamel.error import CommentMark from strictyaml.ruamel.scalarstring import SingleQuotedScalarString from strictyaml.ruamel.tokens import CommentToken @@ -61,12 +61,87 @@ def _ensure_unique(seq: list[dict[str, Any]], key: str, context: str) -> None: def _yaml_str(value: str) -> str | SingleQuotedScalarString: - """Return SingleQuotedScalarString if value would be misread as non-string.""" - if not isinstance(yaml.safe_load(value), str): + """Return SingleQuotedScalarString if value would be misread as non-string. + + If ``yaml.safe_load`` cannot parse *value* at all (e.g. it starts with a + ``%`` directive marker) the value is quoted to be safe. + """ + try: + if not isinstance(yaml.safe_load(value), str): + return SingleQuotedScalarString(value) + except yaml.YAMLError: return SingleQuotedScalarString(value) return value +def _ensure_blank_line_after_nested_map(parent: CommentedMap, key: str) -> None: + """Ensure a blank line appears after a nested mapping value. + + For a block mapping value (e.g. ``integrity:`` with sub-key ``hash:``), + the blank line must live inside the nested map — at + ``nested.ca.items[last_key][2]`` — not on the parent key. Putting it on + the parent key at position ``[2]`` or ``[3]`` lands it *between* the key + line and the first sub-key, producing a spurious blank line. + """ + # Clear any spurious blanks the parent map may carry on positions [2]/[3]. + parent_items = parent.ca.items.get(key) + if parent_items: + if parent_items[2] is not None: + parent_items[2] = None + if parent_items[3] is not None: + parent_items[3] = None + + nested: CommentedMap = parent[key] + nested_keys = list(nested.keys()) + if nested_keys: + _ensure_blank_line_after(nested, nested_keys[-1]) + + +def _ensure_blank_line_after_seq(seq: CommentedSeq) -> None: + """Ensure there is a blank line after the last item of *seq*. + + The blank line belongs at ``seq.ca.items[last_idx][0]`` (the pre-comment + slot of the last element). We also clear any extra newline stored at + ``seq.ca.items[0][1]``, which is where ruamel parks whitespace between the + parent key (e.g. ``patch:``) and the first ``-`` item — the source of the + spurious blank line that this function is designed to prevent. + """ + if not seq: + return + + # Clear spurious blank line between the key and the first list item. + first_item_ca = seq.ca.items.get(0) + if first_item_ca is not None and first_item_ca[1] is not None: + first_item_ca[1] = None + + # Ensure blank line after the last item for project-entry separation. + last_idx = len(seq) - 1 + existing = seq.ca.items.get(last_idx, [None, None, None, None]) + token = existing[0] + if token is None: + existing[0] = CommentToken("\n\n", CommentMark(0), None) + seq.ca.items[last_idx] = existing + elif not token.value.endswith("\n\n"): + token.value = token.value.rstrip("\n") + "\n\n" + + +def _ensure_blank_line_after(ca_map: CommentedMap, key: str) -> None: + """Ensure there is exactly one blank line after *key*'s value in *ca_map*. + + Blank lines in ruamel are stored as trailing ``CommentToken`` values at + position ``[2]`` of ``ca_map.ca.items[key]``. If no token exists one is + created; if one already exists its trailing newlines are normalised to two + (the line ending of the value itself plus the blank line). + """ + items = ca_map.ca.items.get(key, [None, None, None, None]) + token = items[2] + if token is None: + items[2] = CommentToken("\n\n", CommentMark(0), None) + ca_map.ca.items[key] = items + elif not token.value.endswith("\n\n"): + token.value = token.value.rstrip("\n") + "\n\n" + + @dataclass class ManifestEntryLocation: """Location of an entry in the manifest file.""" @@ -133,10 +208,15 @@ class ManifestDict(TypedDict, total=True): # pylint: disable=too-many-ancestors ] -class Manifest: # pylint: disable=too-many-instance-attributes +class Manifest: """Manifest describing all the modules information. This class is created from the manifest file a project has. + + ``self._doc`` is the single source of truth: all state is read from and written + to the underlying YAML document. The only cached fields are ``__path``, + ``__relative_path``, ``__version`` (immutable after construction), and + ``_default_remote_name`` (also immutable: remotes are never added at runtime). """ CURRENT_VERSION = "0.0" @@ -159,30 +239,33 @@ def __init__( manifest_data: dict[str, Any] = cast(dict[str, Any], doc.data)["manifest"] self.__version: str = str(manifest_data.get("version", self.CURRENT_VERSION)) - # Ensure version is always written as a quoted string by update_dump(). + # Ensure version is always written as a quoted string by dump(). doc["manifest"].as_marked_up()["version"] = SingleQuotedScalarString( self.__version ) - remotes = manifest_data.get("remotes", []) - projects = manifest_data["projects"] + remotes_raw = manifest_data.get("remotes", []) + projects_raw = manifest_data["projects"] - _ensure_unique(remotes, "name", "manifest.remotes") - _ensure_unique(projects, "name", "manifest.projects") - _ensure_unique(projects, "dst", "manifest.projects") - - self._remotes, default_remotes = self._determine_remotes(remotes) + _ensure_unique(remotes_raw, "name", "manifest.remotes") + _ensure_unique(projects_raw, "name", "manifest.projects") + _ensure_unique(projects_raw, "dst", "manifest.projects") + # Determine and cache the default remote name (remotes don't change at runtime). + remotes_dict, default_remotes = self._determine_remotes(remotes_raw) if not default_remotes: - default_remotes = list(self._remotes.values())[0:1] - + default_remotes = list(remotes_dict.values())[0:1] self._default_remote_name = ( "" if not default_remotes else default_remotes[0].name ) - self._projects = self._init_projects(projects) + # Ensure blank lines appear before 'remotes:' and 'projects:' when dumped. + self._ensure_section_spacing() - def _init_projects( + # Re-apply quoting to scalars whose style was stripped by strictyaml. + self._normalize_string_scalars() + + def _build_projects( self, projects: Sequence[ ProjectEntryDict @@ -190,44 +273,32 @@ def _init_projects( | dict[str, str | list[str] | dict[str, str]] ], ) -> dict[str, ProjectEntry]: - """Iterate over projects from manifest and initialize ProjectEntries from it. + """Build a mapping of name → ProjectEntry from raw project data. Args: - projects (Sequence[ - Union[ProjectEntryDict, ProjectEntry, Dict[str, Union[str, list[str], dict[str, str]]]] - ]): Iterable with projects + projects: Iterable of project dicts or ProjectEntry objects. Raises: - RuntimeError: Project unknown + KeyError: A project dict is missing ``name``. + TypeError: A project name is not a string. + RuntimeError: A project references an unknown remote. Returns: - Dict[str, ProjectEntry]: Dictionary with key: Name of project, Value: ProjectEntry + Dict mapping project name to ProjectEntry. """ + remotes = {r.name: r for r in self.remotes} _projects: dict[str, ProjectEntry] = {} for project in projects: - if isinstance(project, dict): - if "name" not in project: - raise KeyError("Missing name!") - if not isinstance(project["name"], str): - raise TypeError( - f"Project name must be a string, got {type(project['name']).__name__}" - ) - last_project = _projects[project["name"]] = ProjectEntry.from_yaml( - project, self._default_remote_name - ) - elif isinstance(project, ProjectEntry): - last_project = _projects[project.name] = ProjectEntry.copy(project) - else: - raise RuntimeError(f"{project} has unknown type") - - if last_project.remote: + entry = ProjectEntry.from_raw(project, self._default_remote_name) + _projects[entry.name] = entry + if entry.remote: try: - last_project.set_remote(self._remotes[last_project.remote]) + entry.set_remote(remotes[entry.remote]) except KeyError as exc: raise RuntimeError( - f"Remote {last_project.remote} of {last_project.name} wasn't found " - f"in {list(self._remotes.keys())}!", + f"Remote {entry.remote} of {entry.name} wasn't found " + f"in {list(remotes.keys())}!", ) from exc return _projects @@ -273,6 +344,8 @@ def from_yaml( ] ) ) from err + except ValueError as err: + raise RuntimeError(f"Schema validation failed: {err}") from err return Manifest(doc, path=path) @staticmethod @@ -310,93 +383,56 @@ def version(self) -> str: @property def projects(self) -> Sequence[ProjectEntry]: """Get a list of Projects from the manifest.""" - return list(self._projects.values()) + projects_mu = self._doc["manifest"]["projects"].as_marked_up() + return list(self._build_projects(projects_mu).values()) + + @staticmethod + def _filter_projects( + names: Sequence[str], all_projects: Sequence[ProjectEntry] + ) -> list[ProjectEntry]: + """Return projects whose name appears in *names*, or all if *names* is empty.""" + if not names: + return list(all_projects) + return [p for p in all_projects if p.name in names] def selected_projects(self, names: Sequence[str]) -> Sequence[ProjectEntry]: """Get a list of Projects from the manifest with the given names.""" - projects = ( - [p for p in self._projects.values() if p.name in names] - if names - else list(self._projects.values()) - ) - self._check_all_names_found(names, projects) - return projects + all_projects = self.projects + unique_names = list(dict.fromkeys(names)) + result = self._filter_projects(unique_names, all_projects) - def _check_all_names_found( - self, names: Sequence[str], projects: Sequence[ProjectEntry] - ) -> None: - """Raise if any of *names* is not represented in *projects*.""" - unique_names = list(dict.fromkeys(names)) # deduplicate, preserve order - if not unique_names or len(projects) == len(unique_names): - return - found = {project.name for project in projects} - unfound = [name for name in unique_names if name not in found] - possibles = [project.name for project in self._projects.values()] - raise RequestedProjectNotFoundError(unfound, possibles) + if not unique_names or len(result) == len(unique_names): + return result + + found = {project.name for project in result} + + raise RequestedProjectNotFoundError( + unfound=[name for name in unique_names if name not in found], + possibles=[project.name for project in all_projects], + ) @property def remotes(self) -> Sequence[Remote]: """Get a list of Remotes from the manifest.""" - return list(self._remotes.values()) + manifest_mu = self._doc["manifest"].as_marked_up() + remotes_dict, _ = self._determine_remotes(manifest_mu.get("remotes", [])) + return list(remotes_dict.values()) def __repr__(self) -> str: """Get string representing this object.""" - return str(self._as_dict()) - - def _as_dict(self) -> dict[str, ManifestDict]: - """Get this manifest as dict.""" - remotes: Sequence[RemoteDict] = [ - remote.as_yaml() for remote in self._remotes.values() - ] + return str(self._doc.as_yaml()) - if len(remotes) == 1: - remotes[0].pop("default", None) + def dump(self, path: str | None = None) -> None: + """Write the manifest to *path*, preserving formatting and comments. - projects: list[dict[str, str | list[str] | dict[str, str]]] = [] - for project in self.projects: - project_yaml: dict[str, str | list[str] | dict[str, str]] = ( - project.as_yaml() - ) - if len(remotes) == 1: - project_yaml.pop("remote", None) - projects.append(project_yaml) - - if remotes: - return { - "manifest": { - "version": self.version, - "remotes": remotes, - "projects": projects, - } - } - - return { - "manifest": { - "version": self.version, - "projects": projects, - } - } - - def dump(self, path: str) -> None: - """Dump metadata file to correct path.""" - with open(path, "w+", encoding="utf-8") as manifest_file: - yaml.dump( - self._as_dict(), - manifest_file, - Dumper=ManifestDumper, - sort_keys=False, - line_break=os.linesep, - ) - - def update_dump(self) -> None: - """Dump the manifest to its path, using the original text as a base to preserve formatting and comments.""" - if not self.__path: - raise RuntimeError("Cannot update dump of manifest with no path") - - updated_text = self._doc.as_yaml() - - with open(self.__path, "w", encoding="utf-8", newline="") as manifest_file: - manifest_file.write(updated_text) + If *path* is omitted the manifest is written back to the file it was + loaded from. Raises ``RuntimeError`` if no path is available. + """ + target = path or self.__path + if not target: + raise RuntimeError("Cannot dump manifest with no path") + with open(target, "w", encoding="utf-8", newline="") as manifest_file: + manifest_file.write(self._doc.as_yaml()) def find_name_in_manifest(self, name: str) -> ManifestEntryLocation: """Find the location of a project name in the manifest. @@ -417,12 +453,59 @@ def find_name_in_manifest(self, name: str) -> ManifestEntryLocation: raise RuntimeError(f"{name} was not found in the manifest!") # ---------------- YAML updates ---------------- + def _normalize_string_scalars(self) -> None: + """Re-apply ``SingleQuotedScalarString`` to any string that would be misread. + + strictyaml strips quoting-style information during schema validation, so + a value like ``'176'`` (single-quoted in the source YAML) becomes a plain + Python ``str`` in the ruamel layer. ruamel then serialises it without + quotes, producing the integer ``176``. We walk all top-level string + fields in every project and remote entry and restore the quoting wherever + ``_yaml_str`` determines it is needed. + """ + manifest_mu = self._doc["manifest"].as_marked_up() + for entry in manifest_mu.get("projects", []): + for key in list(entry.keys()): + if isinstance(entry[key], str): + entry[key] = _yaml_str(entry[key]) + for entry in manifest_mu.get("remotes", []): + for key in list(entry.keys()): + if isinstance(entry[key], str): + entry[key] = _yaml_str(entry[key]) + + def _ensure_section_spacing(self) -> None: + """Ensure blank lines appear before ``remotes:`` and ``projects:`` and between entries.""" + manifest_mu = self._doc["manifest"].as_marked_up() + _ensure_blank_line_after(manifest_mu, "version") + + remotes_mu = manifest_mu.get("remotes") + if remotes_mu: + last_remote = remotes_mu[-1] + _ensure_blank_line_after(last_remote, list(last_remote.keys())[-1]) + + projects_mu = manifest_mu.get("projects", []) + for project in projects_mu[:-1]: # all entries except the last + last_key = list(project.keys())[-1] + value = project[last_key] + if isinstance(value, CommentedSeq): + # Clear any spurious blank line that ended up between the key + # and the first list item (stored at ca.items[key][2] in the + # parent map), then place the blank line after the last item. + key_items = project.ca.items.get(last_key) + if key_items and key_items[2] is not None: + key_items[2] = None + _ensure_blank_line_after_seq(value) + elif isinstance(value, CommentedMap): + _ensure_blank_line_after_nested_map(project, last_key) + else: + _ensure_blank_line_after(project, last_key) + def append_project_entry(self, project_entry: "ProjectEntry") -> None: """Append *project_entry* to the projects list in-memory. The new entry is formatted the same way as the existing YAML in the document (2-space indent under ``projects:``). Call - :meth:`update_dump` afterwards to persist the change to disk. + :meth:`dump` afterwards to persist the change to disk. """ projects_mu = self._doc["manifest"]["projects"].as_marked_up() projects_mu.append(CommentedMap(project_entry.as_yaml())) @@ -433,7 +516,6 @@ def append_project_entry(self, project_entry: "ProjectEntry") -> None: None, None, ] - self._projects[project_entry.name] = ProjectEntry.copy(project_entry) def update_project_version(self, project: ProjectEntry) -> None: """Update a project's version in the manifest in-place, preserving layout, comments, and line endings.""" @@ -534,25 +616,3 @@ def find_remote_for_url(self, remote_url: str) -> Remote | None: if target.startswith(remote_base): return remote return None - - -class ManifestDumper(yaml.SafeDumper): # pylint: disable=too-many-ancestors - """Dump a manifest YAML. - - HACK: insert blank lines between top-level objects - inspired by https://stackoverflow.com/a/44284819/3786245 - """ - - _last_additional_break = 0 - - def write_line_break(self, data: Any = None) -> None: - """Write a line break.""" - super().write_line_break(data) # type: ignore[unused-ignore, no-untyped-call] - - if len(self.indents) == 2 and getattr(self.event, "value", "") != "version": - super().write_line_break() # type: ignore[unused-ignore, no-untyped-call] - - if len(self.indents) == 3 and self._last_additional_break != 2: - super().write_line_break() # type: ignore[unused-ignore, no-untyped-call] - - self._last_additional_break = len(self.indents) diff --git a/dfetch/manifest/project.py b/dfetch/manifest/project.py index 00c570f1a..2a34ed28a 100644 --- a/dfetch/manifest/project.py +++ b/dfetch/manifest/project.py @@ -570,3 +570,27 @@ def as_yaml(self) -> dict[str, str | list[str] | dict[str, str]]: } return {k: v for k, v in yamldata.items() if v} + + @staticmethod + def from_raw( + project: "ProjectEntryDict | ProjectEntry | dict[str, str | list[str] | dict[str, str]]", + default_remote: str = "", + ) -> "ProjectEntry": + """Convert a raw project value to a :class:`ProjectEntry`. + + Raises: + KeyError: *project* is a dict missing ``name``. + TypeError: The ``name`` value is not a string. + RuntimeError: *project* has an unrecognised type. + """ + if isinstance(project, dict): + if "name" not in project: + raise KeyError("Missing name!") + if not isinstance(project["name"], str): + raise TypeError( + f"Project name must be a string, got {type(project['name']).__name__}" + ) + return ProjectEntry.from_yaml(project, default_remote) + if isinstance(project, ProjectEntry): + return ProjectEntry.copy(project) + raise RuntimeError(f"{project} has unknown type") diff --git a/tests/test_add.py b/tests/test_add.py index 4ff168cc3..f77917104 100644 --- a/tests/test_add.py +++ b/tests/test_add.py @@ -175,7 +175,7 @@ def test_add_command_non_interactive_appends_entry(): Add()(_make_args("https://github.com/org/myrepo.git")) fake_superproject.manifest.append_project_entry.assert_called_once() - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] @@ -210,7 +210,7 @@ def test_add_command_non_interactive_field_overrides(): ) fake_superproject.manifest.append_project_entry.assert_called_once() - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] @@ -241,7 +241,7 @@ def test_add_command_suffixes_duplicate_name(): Add()(_make_args("https://github.com/org/myrepo.git")) fake_superproject.manifest.append_project_entry.assert_called_once() - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] @@ -286,7 +286,7 @@ def test_add_command_interactive_branch(): ) fake_superproject.manifest.append_project_entry.assert_called_once() - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] @@ -327,7 +327,7 @@ def test_add_command_interactive_branch_by_number(): ) ) - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] @@ -367,7 +367,7 @@ def test_add_command_interactive_tag(): ) fake_superproject.manifest.append_project_entry.assert_called_once() - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] @@ -405,7 +405,7 @@ def test_add_command_interactive_abort(): ) fake_superproject.manifest.append_project_entry.assert_not_called() - fake_superproject.manifest.update_dump.assert_not_called() + fake_superproject.manifest.dump.assert_not_called() def test_add_command_interactive_with_src(): @@ -441,7 +441,7 @@ def test_add_command_interactive_with_src(): ) fake_superproject.manifest.append_project_entry.assert_called_once() - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] @@ -481,7 +481,7 @@ def test_add_command_interactive_with_ignore(): ) fake_superproject.manifest.append_project_entry.assert_called_once() - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] @@ -562,7 +562,7 @@ def test_add_command_interactive_with_overrides(): ) fake_superproject.manifest.append_project_entry.assert_called_once() - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] @@ -606,7 +606,7 @@ def test_add_command_interactive_with_all_overrides(): mock_prompt.assert_not_called() fake_superproject.manifest.append_project_entry.assert_called_once() - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] @@ -665,7 +665,7 @@ def test_add_command_interactive_svn_trunk(): Add()(_make_args(_SVN_URL, interactive=True)) fake_superproject.manifest.append_project_entry.assert_called_once() - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] @@ -700,7 +700,7 @@ def test_add_command_interactive_svn_custom_branch(): ): Add()(_make_args(_SVN_URL, interactive=True)) - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] @@ -734,7 +734,7 @@ def test_add_command_interactive_svn_tag(): ): Add()(_make_args(_SVN_URL, interactive=True)) - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] @@ -769,7 +769,7 @@ def test_add_command_interactive_svn_branch_by_number(): ): Add()(_make_args(_SVN_URL, interactive=True)) - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] @@ -794,7 +794,7 @@ def test_add_command_non_interactive_svn(): Add()(_make_args(_SVN_URL)) fake_superproject.manifest.append_project_entry.assert_called_once() - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] @@ -832,7 +832,7 @@ def test_add_command_matches_existing_remote(): ) fake_superproject.manifest.append_project_entry.assert_called_once() - fake_superproject.manifest.update_dump.assert_called_once() + fake_superproject.manifest.dump.assert_called_once() entry: ProjectEntry = fake_superproject.manifest.append_project_entry.call_args[0][ 0 ] diff --git a/tests/test_manifest.py b/tests/test_manifest.py index b271da697..776790906 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -78,7 +78,7 @@ def test_no_remotes() -> None: assert len(manifest.projects) == 1 assert manifest.projects[0].name == "my-project" assert manifest.projects[0].remote_url == "http://www.somewhere.com" - assert len(manifest._remotes) == 0 + assert len(manifest.remotes) == 0 def test_construct_from_yaml() -> None: @@ -88,8 +88,8 @@ def test_construct_from_yaml() -> None: assert manifest.version == "0" assert len(manifest.projects) == 1 assert manifest.projects[0].name == "my-project" - assert len(manifest._remotes) == 1 - assert next(iter(manifest._remotes.values())).name == "my-remote" + assert len(manifest.remotes) == 1 + assert manifest.remotes[0].name == "my-remote" def test_no_manifests_found() -> None: @@ -415,8 +415,8 @@ def test_version_parsed_as_string(manifest_text: str, expected_version: str) -> "manifest:\n version: '0.0'\n projects:\n - name: p\n", ], ) -def test_update_dump_writes_version_as_quoted_string(manifest_text: str) -> None: - """update_dump must always write version as a quoted YAML string.""" +def test_dump_writes_version_as_quoted_string(manifest_text: str) -> None: + """dump must always write version as a quoted YAML string.""" manifest = Manifest.from_yaml(manifest_text) result = manifest._doc.as_yaml() # The version value must appear quoted so that YAML parsers read it as a string. @@ -448,3 +448,123 @@ def test_append_project_entry_check_name_uniqueness_sees_new_entry() -> None: with pytest.raises(ValueError, match="newproject"): manifest.check_name_uniqueness("newproject") + + +# --------------------------------------------------------------------------- +# patch list: no spurious blank line between 'patch:' and first item +# --------------------------------------------------------------------------- + +_PATCH_LIST_MANIFEST = """\ +manifest: + version: '0.0' + projects: + - name: first + url: https://example.com/first + patch: + - patches/a.patch + - patches/b.patch + - name: second + url: https://example.com/second +""" + +_PATCH_LIST_MANIFEST_WITH_SPURIOUS_BLANK = """\ +manifest: + version: '0.0' + projects: + - name: first + url: https://example.com/first + patch: + + - patches/a.patch + - patches/b.patch + - name: second + url: https://example.com/second +""" + + +def test_patch_list_no_blank_line_before_first_item() -> None: + """Loading a manifest with a multi-entry patch list must not insert a blank + line between 'patch:' and the first list item.""" + result = Manifest.from_yaml(_PATCH_LIST_MANIFEST)._doc.as_yaml() + assert ( + "patch:\n - patches/a.patch" in result + ), "spurious blank line appeared between 'patch:' and first item" + + +def test_patch_list_blank_line_after_last_item_before_next_project() -> None: + """A blank line must still separate consecutive project entries when the + first project's last key is a multi-entry patch list.""" + result = Manifest.from_yaml(_PATCH_LIST_MANIFEST)._doc.as_yaml() + assert ( + " - patches/b.patch\n\n - name: second" in result + ), "missing blank line between projects when patch list is the last key" + + +def test_patch_list_fixes_spurious_blank_when_already_present() -> None: + """Re-loading a manifest that already contains the spurious blank line must + heal it on the next serialisation.""" + result = Manifest.from_yaml(_PATCH_LIST_MANIFEST_WITH_SPURIOUS_BLANK)._doc.as_yaml() + assert ( + "patch:\n\n" not in result + ), "spurious blank line between 'patch:' and first item was not removed" + assert "patch:\n - patches/a.patch" in result + + +# --------------------------------------------------------------------------- +# integrity block: no spurious blank line between 'integrity:' and 'hash:' +# --------------------------------------------------------------------------- + +_HASH = "sha256:" + "a" * 64 + +_INTEGRITY_MANIFEST = f"""\ +manifest: + version: '0.0' + projects: + - name: first + url: https://example.com/first + integrity: + hash: {_HASH} + - name: second + url: https://example.com/second +""" + +_INTEGRITY_MANIFEST_WITH_SPURIOUS_BLANK = f"""\ +manifest: + version: '0.0' + projects: + - name: first + url: https://example.com/first + integrity: + + hash: {_HASH} + - name: second + url: https://example.com/second +""" + + +def test_integrity_no_blank_line_before_hash() -> None: + """Loading a manifest with an integrity block must not insert a blank line + between 'integrity:' and 'hash:'.""" + result = Manifest.from_yaml(_INTEGRITY_MANIFEST)._doc.as_yaml() + assert ( + "integrity:\n hash:" in result + ), "spurious blank line appeared between 'integrity:' and 'hash:'" + + +def test_integrity_blank_line_after_hash_before_next_project() -> None: + """A blank line must still separate consecutive project entries when the + first project's last key is an integrity block.""" + result = Manifest.from_yaml(_INTEGRITY_MANIFEST)._doc.as_yaml() + assert ( + f"hash: {_HASH}\n\n - name: second" in result + ), "missing blank line between projects when integrity is the last key" + + +def test_integrity_fixes_spurious_blank_when_already_present() -> None: + """Re-loading a manifest with a spurious blank between 'integrity:' and + 'hash:' must heal it on the next serialisation.""" + result = Manifest.from_yaml(_INTEGRITY_MANIFEST_WITH_SPURIOUS_BLANK)._doc.as_yaml() + assert ( + "integrity:\n\n" not in result + ), "spurious blank line between 'integrity:' and 'hash:' was not removed" + assert "integrity:\n hash:" in result From 0b090226d5992b8019629d46b306d19134d4c335 Mon Sep 17 00:00:00 2001 From: Ben Spoor Date: Tue, 7 Apr 2026 19:39:29 +0200 Subject: [PATCH 2/3] Use builder pattern for manifest --- dfetch/commands/import_.py | 20 ++-- dfetch/manifest/manifest.py | 150 +++++++++++------------------- tests/test_import.py | 4 +- tests/test_manifest.py | 178 ++++++++++++++++-------------------- 4 files changed, 137 insertions(+), 215 deletions(-) diff --git a/dfetch/commands/import_.py b/dfetch/commands/import_.py index cecb537b7..3140b4b72 100644 --- a/dfetch/commands/import_.py +++ b/dfetch/commands/import_.py @@ -16,12 +16,10 @@ from collections.abc import Sequence from itertools import combinations -import yaml - import dfetch.commands.command from dfetch import DEFAULT_MANIFEST_NAME from dfetch.log import get_logger -from dfetch.manifest.manifest import Manifest +from dfetch.manifest.manifest import ManifestBuilder from dfetch.manifest.project import ProjectEntry from dfetch.manifest.remote import Remote from dfetch.project import determine_superproject_vcs @@ -59,21 +57,15 @@ def __call__(self, _: argparse.Namespace) -> None: break single_remote = len(remotes) == 1 - project_dicts = [] + builder = ManifestBuilder() + for remote in remotes: + builder.add_remote(remote) for p in projects: d = p.as_yaml() if single_remote: d.pop("remote", None) - project_dicts.append(d) - - manifest_data = { - "manifest": { - "version": "0.0", - "remotes": [r.as_yaml() for r in remotes], - "projects": project_dicts, - } - } - manifest = Manifest.from_yaml(yaml.dump(manifest_data, sort_keys=False)) + builder.add_project_dict(d) + manifest = builder.build() manifest.dump(DEFAULT_MANIFEST_NAME) logger.info(f"Created manifest ({DEFAULT_MANIFEST_NAME}) in {os.getcwd()}") diff --git a/dfetch/manifest/manifest.py b/dfetch/manifest/manifest.py index 79d04e8d1..55f05d436 100644 --- a/dfetch/manifest/manifest.py +++ b/dfetch/manifest/manifest.py @@ -29,7 +29,7 @@ import yaml from strictyaml import YAML, StrictYAMLError, YAMLValidationError, load -from strictyaml.ruamel.comments import CommentedMap, CommentedSeq +from strictyaml.ruamel.comments import CommentedMap from strictyaml.ruamel.error import CommentMark from strictyaml.ruamel.scalarstring import SingleQuotedScalarString from strictyaml.ruamel.tokens import CommentToken @@ -74,74 +74,6 @@ def _yaml_str(value: str) -> str | SingleQuotedScalarString: return value -def _ensure_blank_line_after_nested_map(parent: CommentedMap, key: str) -> None: - """Ensure a blank line appears after a nested mapping value. - - For a block mapping value (e.g. ``integrity:`` with sub-key ``hash:``), - the blank line must live inside the nested map — at - ``nested.ca.items[last_key][2]`` — not on the parent key. Putting it on - the parent key at position ``[2]`` or ``[3]`` lands it *between* the key - line and the first sub-key, producing a spurious blank line. - """ - # Clear any spurious blanks the parent map may carry on positions [2]/[3]. - parent_items = parent.ca.items.get(key) - if parent_items: - if parent_items[2] is not None: - parent_items[2] = None - if parent_items[3] is not None: - parent_items[3] = None - - nested: CommentedMap = parent[key] - nested_keys = list(nested.keys()) - if nested_keys: - _ensure_blank_line_after(nested, nested_keys[-1]) - - -def _ensure_blank_line_after_seq(seq: CommentedSeq) -> None: - """Ensure there is a blank line after the last item of *seq*. - - The blank line belongs at ``seq.ca.items[last_idx][0]`` (the pre-comment - slot of the last element). We also clear any extra newline stored at - ``seq.ca.items[0][1]``, which is where ruamel parks whitespace between the - parent key (e.g. ``patch:``) and the first ``-`` item — the source of the - spurious blank line that this function is designed to prevent. - """ - if not seq: - return - - # Clear spurious blank line between the key and the first list item. - first_item_ca = seq.ca.items.get(0) - if first_item_ca is not None and first_item_ca[1] is not None: - first_item_ca[1] = None - - # Ensure blank line after the last item for project-entry separation. - last_idx = len(seq) - 1 - existing = seq.ca.items.get(last_idx, [None, None, None, None]) - token = existing[0] - if token is None: - existing[0] = CommentToken("\n\n", CommentMark(0), None) - seq.ca.items[last_idx] = existing - elif not token.value.endswith("\n\n"): - token.value = token.value.rstrip("\n") + "\n\n" - - -def _ensure_blank_line_after(ca_map: CommentedMap, key: str) -> None: - """Ensure there is exactly one blank line after *key*'s value in *ca_map*. - - Blank lines in ruamel are stored as trailing ``CommentToken`` values at - position ``[2]`` of ``ca_map.ca.items[key]``. If no token exists one is - created; if one already exists its trailing newlines are normalised to two - (the line ending of the value itself plus the blank line). - """ - items = ca_map.ca.items.get(key, [None, None, None, None]) - token = items[2] - if token is None: - items[2] = CommentToken("\n\n", CommentMark(0), None) - ca_map.ca.items[key] = items - elif not token.value.endswith("\n\n"): - token.value = token.value.rstrip("\n") + "\n\n" - - @dataclass class ManifestEntryLocation: """Location of an entry in the manifest file.""" @@ -259,9 +191,6 @@ def __init__( "" if not default_remotes else default_remotes[0].name ) - # Ensure blank lines appear before 'remotes:' and 'projects:' when dumped. - self._ensure_section_spacing() - # Re-apply quoting to scalars whose style was stripped by strictyaml. self._normalize_string_scalars() @@ -473,33 +402,6 @@ def _normalize_string_scalars(self) -> None: if isinstance(entry[key], str): entry[key] = _yaml_str(entry[key]) - def _ensure_section_spacing(self) -> None: - """Ensure blank lines appear before ``remotes:`` and ``projects:`` and between entries.""" - manifest_mu = self._doc["manifest"].as_marked_up() - _ensure_blank_line_after(manifest_mu, "version") - - remotes_mu = manifest_mu.get("remotes") - if remotes_mu: - last_remote = remotes_mu[-1] - _ensure_blank_line_after(last_remote, list(last_remote.keys())[-1]) - - projects_mu = manifest_mu.get("projects", []) - for project in projects_mu[:-1]: # all entries except the last - last_key = list(project.keys())[-1] - value = project[last_key] - if isinstance(value, CommentedSeq): - # Clear any spurious blank line that ended up between the key - # and the first list item (stored at ca.items[key][2] in the - # parent map), then place the blank line after the last item. - key_items = project.ca.items.get(last_key) - if key_items and key_items[2] is not None: - key_items[2] = None - _ensure_blank_line_after_seq(value) - elif isinstance(value, CommentedMap): - _ensure_blank_line_after_nested_map(project, last_key) - else: - _ensure_blank_line_after(project, last_key) - def append_project_entry(self, project_entry: "ProjectEntry") -> None: """Append *project_entry* to the projects list in-memory. @@ -616,3 +518,53 @@ def find_remote_for_url(self, remote_url: str) -> Remote | None: if target.startswith(remote_base): return remote return None + + +class ManifestBuilder: + """Builds a new manifest YAML document from scratch with correct blank-line formatting. + + Use this when creating a manifest from scratch (e.g. ``dfetch import``). + For loading and modifying existing manifests use :class:`Manifest` directly. + """ + + def __init__(self) -> None: + """Create an empty builder.""" + self._remotes: list[Remote] = [] + self._project_dicts: list[dict[str, Any]] = [] + + def add_remote(self, remote: Remote) -> "ManifestBuilder": + """Add a remote entry.""" + self._remotes.append(remote) + return self + + def add_project_dict(self, project: dict[str, Any]) -> "ManifestBuilder": + """Add a project entry as a plain dict (as returned by ``ProjectEntry.as_yaml()``).""" + self._project_dicts.append(project) + return self + + def build(self) -> "Manifest": + """Render and load the manifest.""" + return Manifest.from_yaml(self._render()) + + def _render(self) -> str: + """Produce a correctly-formatted YAML string for the manifest.""" + data: dict[str, Any] = {"manifest": {"version": "0.0"}} + if self._remotes: + data["manifest"]["remotes"] = [r.as_yaml() for r in self._remotes] + data["manifest"]["projects"] = self._project_dicts + + raw = yaml.dump(data, sort_keys=False) + + # Insert a blank line before the remotes: and projects: section headers. + raw = re.sub(r"\n( (?:remotes|projects):)", r"\n\n\1", raw) + + # Insert a blank line between consecutive entries in each section. + # re.split with a capturing group keeps the delimiters in the list, so + # every even index ≥ 2 is a section body that can be substituted + # independently without one pass's changes affecting the next. + parts = re.split(r"(\n (?:remotes|projects):\n)", raw) + for i in range(2, len(parts), 2): + parts[i] = re.sub(r"\n( - )", r"\n\n\1", parts[i]) + raw = "".join(parts) + + return raw diff --git a/tests/test_import.py b/tests/test_import.py index cc1f70414..1a26817ac 100644 --- a/tests/test_import.py +++ b/tests/test_import.py @@ -53,7 +53,7 @@ def test_git_import(name, submodules): with patch( "dfetch.project.gitsuperproject.GitLocalRepo.submodules" ) as mocked_submodules: - with patch("dfetch.commands.import_.Manifest") as mocked_manifest: + with patch("dfetch.manifest.manifest.Manifest") as mocked_manifest: mocked_submodules.return_value = submodules with patch( "dfetch.project.gitsuperproject.GitLocalRepo.is_git" @@ -121,7 +121,7 @@ def test_svn_import(name, externals): with patch( "dfetch.project.svnsuperproject.SvnRepo.externals" ) as mocked_externals: - with patch("dfetch.commands.import_.Manifest") as mocked_manifest: + with patch("dfetch.manifest.manifest.Manifest") as mocked_manifest: with patch( "dfetch.project.gitsuperproject.GitLocalRepo.is_git" ) as is_git: diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 776790906..6ec3c6332 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -13,11 +13,13 @@ from dfetch import DEFAULT_MANIFEST_NAME from dfetch.manifest.manifest import ( Manifest, + ManifestBuilder, ManifestEntryLocation, RequestedProjectNotFoundError, ) from dfetch.manifest.parse import find_manifest, get_submanifests from dfetch.manifest.project import ProjectEntry, ProjectEntryDict +from dfetch.manifest.remote import Remote BASIC_MANIFEST = """ manifest: @@ -451,120 +453,96 @@ def test_append_project_entry_check_name_uniqueness_sees_new_entry() -> None: # --------------------------------------------------------------------------- -# patch list: no spurious blank line between 'patch:' and first item +# ManifestBuilder # --------------------------------------------------------------------------- -_PATCH_LIST_MANIFEST = """\ -manifest: - version: '0.0' - projects: - - name: first - url: https://example.com/first - patch: - - patches/a.patch - - patches/b.patch - - name: second - url: https://example.com/second -""" +_GITHUB = Remote({"name": "github", "url-base": "https://github.com/"}) -_PATCH_LIST_MANIFEST_WITH_SPURIOUS_BLANK = """\ -manifest: - version: '0.0' - projects: - - name: first - url: https://example.com/first - patch: - - - patches/a.patch - - patches/b.patch - - name: second - url: https://example.com/second -""" +def _builder_with_one_project() -> ManifestBuilder: + return ManifestBuilder().add_project_dict({"name": "mylib", "dst": "libs/mylib"}) -def test_patch_list_no_blank_line_before_first_item() -> None: - """Loading a manifest with a multi-entry patch list must not insert a blank - line between 'patch:' and the first list item.""" - result = Manifest.from_yaml(_PATCH_LIST_MANIFEST)._doc.as_yaml() - assert ( - "patch:\n - patches/a.patch" in result - ), "spurious blank line appeared between 'patch:' and first item" +def test_builder_returns_manifest() -> None: + manifest = _builder_with_one_project().build() + assert isinstance(manifest, Manifest) -def test_patch_list_blank_line_after_last_item_before_next_project() -> None: - """A blank line must still separate consecutive project entries when the - first project's last key is a multi-entry patch list.""" - result = Manifest.from_yaml(_PATCH_LIST_MANIFEST)._doc.as_yaml() - assert ( - " - patches/b.patch\n\n - name: second" in result - ), "missing blank line between projects when patch list is the last key" +def test_builder_sets_version() -> None: + manifest = _builder_with_one_project().build() + assert manifest.version == "0.0" -def test_patch_list_fixes_spurious_blank_when_already_present() -> None: - """Re-loading a manifest that already contains the spurious blank line must - heal it on the next serialisation.""" - result = Manifest.from_yaml(_PATCH_LIST_MANIFEST_WITH_SPURIOUS_BLANK)._doc.as_yaml() - assert ( - "patch:\n\n" not in result - ), "spurious blank line between 'patch:' and first item was not removed" - assert "patch:\n - patches/a.patch" in result +def test_builder_projects_are_accessible() -> None: + manifest = ( + ManifestBuilder() + .add_project_dict({"name": "alpha", "dst": "libs/alpha"}) + .add_project_dict({"name": "beta", "dst": "libs/beta"}) + .build() + ) + names = [p.name for p in manifest.projects] + assert names == ["alpha", "beta"] -# --------------------------------------------------------------------------- -# integrity block: no spurious blank line between 'integrity:' and 'hash:' -# --------------------------------------------------------------------------- -_HASH = "sha256:" + "a" * 64 +def test_builder_remote_is_accessible() -> None: + manifest = ( + ManifestBuilder() + .add_remote(_GITHUB) + .add_project_dict({"name": "mylib", "remote": "github"}) + .build() + ) + assert len(manifest.remotes) == 1 + assert manifest.remotes[0].name == "github" -_INTEGRITY_MANIFEST = f"""\ -manifest: - version: '0.0' - projects: - - name: first - url: https://example.com/first - integrity: - hash: {_HASH} - - name: second - url: https://example.com/second -""" -_INTEGRITY_MANIFEST_WITH_SPURIOUS_BLANK = f"""\ -manifest: - version: '0.0' - projects: - - name: first - url: https://example.com/first - integrity: +def test_builder_blank_line_after_version() -> None: + manifest = _builder_with_one_project().build() + assert "version: '0.0'\n\n" in manifest._doc.as_yaml() - hash: {_HASH} - - name: second - url: https://example.com/second -""" +def test_builder_blank_line_before_projects() -> None: + manifest = _builder_with_one_project().build() + assert "\n\n projects:\n" in manifest._doc.as_yaml() -def test_integrity_no_blank_line_before_hash() -> None: - """Loading a manifest with an integrity block must not insert a blank line - between 'integrity:' and 'hash:'.""" - result = Manifest.from_yaml(_INTEGRITY_MANIFEST)._doc.as_yaml() - assert ( - "integrity:\n hash:" in result - ), "spurious blank line appeared between 'integrity:' and 'hash:'" - - -def test_integrity_blank_line_after_hash_before_next_project() -> None: - """A blank line must still separate consecutive project entries when the - first project's last key is an integrity block.""" - result = Manifest.from_yaml(_INTEGRITY_MANIFEST)._doc.as_yaml() - assert ( - f"hash: {_HASH}\n\n - name: second" in result - ), "missing blank line between projects when integrity is the last key" - - -def test_integrity_fixes_spurious_blank_when_already_present() -> None: - """Re-loading a manifest with a spurious blank between 'integrity:' and - 'hash:' must heal it on the next serialisation.""" - result = Manifest.from_yaml(_INTEGRITY_MANIFEST_WITH_SPURIOUS_BLANK)._doc.as_yaml() - assert ( - "integrity:\n\n" not in result - ), "spurious blank line between 'integrity:' and 'hash:' was not removed" - assert "integrity:\n hash:" in result + +def test_builder_blank_line_before_remotes() -> None: + manifest = ( + ManifestBuilder() + .add_remote(_GITHUB) + .add_project_dict({"name": "mylib"}) + .build() + ) + assert "\n\n remotes:\n" in manifest._doc.as_yaml() + + +def test_builder_blank_line_between_projects() -> None: + manifest = ( + ManifestBuilder() + .add_project_dict({"name": "alpha", "dst": "libs/alpha"}) + .add_project_dict({"name": "beta", "dst": "libs/beta"}) + .build() + ) + assert "\n\n - name: beta" in manifest._doc.as_yaml() + + +def test_builder_no_blank_line_before_first_project() -> None: + manifest = ( + ManifestBuilder() + .add_project_dict({"name": "alpha", "dst": "libs/alpha"}) + .add_project_dict({"name": "beta", "dst": "libs/beta"}) + .build() + ) + yaml_text = manifest._doc.as_yaml() + assert "projects:\n - name: alpha" in yaml_text + + +def test_builder_blank_line_between_remotes() -> None: + second = Remote({"name": "gitlab", "url-base": "https://gitlab.com/"}) + manifest = ( + ManifestBuilder() + .add_remote(_GITHUB) + .add_remote(second) + .add_project_dict({"name": "mylib"}) + .build() + ) + assert "\n\n - name: gitlab" in manifest._doc.as_yaml() From affd7a07c61ddba350e839b2e4e3babf880723ec Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 8 Apr 2026 19:58:23 +0000 Subject: [PATCH 3/3] review comments --- dfetch/manifest/manifest.py | 75 +++++++++++++++++++++++++++++-------- tests/test_manifest.py | 9 ++++- 2 files changed, 67 insertions(+), 17 deletions(-) diff --git a/dfetch/manifest/manifest.py b/dfetch/manifest/manifest.py index 55f05d436..d1848aa43 100644 --- a/dfetch/manifest/manifest.py +++ b/dfetch/manifest/manifest.py @@ -60,6 +60,21 @@ def _ensure_unique(seq: list[dict[str, Any]], key: str, context: str) -> None: ) +def _normalize_value(value: Any) -> Any: + """Recursively normalize strings in mappings and sequences.""" + if isinstance(value, str): + return _yaml_str(value) + if isinstance(value, dict): + for key in list(value.keys()): + value[key] = _normalize_value(value[key]) + return value + if isinstance(value, list): + for i, val in enumerate(value): + value[i] = _normalize_value(val) + return value + return value + + def _yaml_str(value: str) -> str | SingleQuotedScalarString: """Return SingleQuotedScalarString if value would be misread as non-string. @@ -162,6 +177,26 @@ def __init__( path: str | os.PathLike[str] | None = None, ) -> None: """Create the manifest.""" + manifest_data = self._initialize_basic_attributes(doc, path) + remotes_raw = manifest_data.get("remotes", []) + projects_raw = manifest_data["projects"] + self._validate_manifest_data(remotes_raw, projects_raw) + self._setup_default_remote(remotes_raw) + # Re-apply quoting to scalars whose style was stripped by strictyaml. + self._normalize_string_scalars() + + def _initialize_basic_attributes( + self, doc: YAML, path: str | os.PathLike[str] | None + ) -> dict[str, Any]: + """Initialize basic manifest attributes and ensure version is properly quoted. + + Args: + doc: The parsed YAML document. + path: Optional path to the manifest file. + + Returns: + The manifest data dictionary. + """ self._doc = doc self.__path: str = str(path) if path else "" self.__relative_path: str = ( @@ -171,19 +206,30 @@ def __init__( manifest_data: dict[str, Any] = cast(dict[str, Any], doc.data)["manifest"] self.__version: str = str(manifest_data.get("version", self.CURRENT_VERSION)) - # Ensure version is always written as a quoted string by dump(). doc["manifest"].as_marked_up()["version"] = SingleQuotedScalarString( self.__version ) - remotes_raw = manifest_data.get("remotes", []) - projects_raw = manifest_data["projects"] + return manifest_data + def _validate_manifest_data( + self, + remotes_raw: list[dict[str, Any]], + projects_raw: list[dict[str, Any]], + ) -> None: + """Validate that remotes and projects have unique names and destinations.""" _ensure_unique(remotes_raw, "name", "manifest.remotes") _ensure_unique(projects_raw, "name", "manifest.projects") - _ensure_unique(projects_raw, "dst", "manifest.projects") + projects_with_effective_dst = [ + {"effective_dst": project.get("dst") or project["name"]} + for project in projects_raw + ] + _ensure_unique( + projects_with_effective_dst, "effective_dst", "manifest.projects" + ) - # Determine and cache the default remote name (remotes don't change at runtime). + def _setup_default_remote(self, remotes_raw: Sequence[RemoteDict | Remote]) -> None: + """Determine and cache the default remote name.""" remotes_dict, default_remotes = self._determine_remotes(remotes_raw) if not default_remotes: default_remotes = list(remotes_dict.values())[0:1] @@ -191,9 +237,6 @@ def __init__( "" if not default_remotes else default_remotes[0].name ) - # Re-apply quoting to scalars whose style was stripped by strictyaml. - self._normalize_string_scalars() - def _build_projects( self, projects: Sequence[ @@ -360,6 +403,10 @@ def dump(self, path: str | None = None) -> None: target = path or self.__path if not target: raise RuntimeError("Cannot dump manifest with no path") + # Re-normalize string scalars to ensure newly added/modified entries + # (e.g., from append_project_entry or update_project_version) are + # properly quoted before serializing. + self._normalize_string_scalars() with open(target, "w", encoding="utf-8", newline="") as manifest_file: manifest_file.write(self._doc.as_yaml()) @@ -388,19 +435,17 @@ def _normalize_string_scalars(self) -> None: strictyaml strips quoting-style information during schema validation, so a value like ``'176'`` (single-quoted in the source YAML) becomes a plain Python ``str`` in the ruamel layer. ruamel then serialises it without - quotes, producing the integer ``176``. We walk all top-level string - fields in every project and remote entry and restore the quoting wherever - ``_yaml_str`` determines it is needed. + quotes, producing the integer ``176``. We walk all string fields in every + project and remote entry (including nested strings in mappings and sequences) + and restore the quoting wherever ``_yaml_str`` determines it is needed. """ manifest_mu = self._doc["manifest"].as_marked_up() for entry in manifest_mu.get("projects", []): for key in list(entry.keys()): - if isinstance(entry[key], str): - entry[key] = _yaml_str(entry[key]) + entry[key] = _normalize_value(entry[key]) for entry in manifest_mu.get("remotes", []): for key in list(entry.keys()): - if isinstance(entry[key], str): - entry[key] = _yaml_str(entry[key]) + entry[key] = _normalize_value(entry[key]) def append_project_entry(self, project_entry: "ProjectEntry") -> None: """Append *project_entry* to the projects list in-memory. diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 6ec3c6332..00d99006c 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -420,9 +420,14 @@ def test_version_parsed_as_string(manifest_text: str, expected_version: str) -> def test_dump_writes_version_as_quoted_string(manifest_text: str) -> None: """dump must always write version as a quoted YAML string.""" manifest = Manifest.from_yaml(manifest_text) - result = manifest._doc.as_yaml() + # Test the public dump() API by capturing its output with mock_open. + m = mock_open() + with patch("builtins.open", m): + manifest.dump("/tmp/test_manifest.yaml") + # Get the written content from mock_open's call arguments. + written_content = "".join(call.args[0] for call in m().write.call_args_list) # The version value must appear quoted so that YAML parsers read it as a string. - assert f"version: '{manifest.version}'" in result + assert f"version: '{manifest.version}'" in written_content # ---------------------------------------------------------------------------