diff --git a/aws_doc_sdk_examples_tools/agent/update_doc_gen.py b/aws_doc_sdk_examples_tools/agent/update_doc_gen.py index 50b445a..0a78ab4 100644 --- a/aws_doc_sdk_examples_tools/agent/update_doc_gen.py +++ b/aws_doc_sdk_examples_tools/agent/update_doc_gen.py @@ -26,7 +26,7 @@ def examples_from_updates(updates_path: Path) -> Iterable[Example]: examples = [ Example( id=id, - file=None, + file=Path(), languages={}, title=update.get("title"), title_abbrev=update.get("title_abbrev"), diff --git a/aws_doc_sdk_examples_tools/lliam/__init__.py b/aws_doc_sdk_examples_tools/lliam/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/aws_doc_sdk_examples_tools/lliam/service_layer/dedupe_reservoir.py b/aws_doc_sdk_examples_tools/lliam/service_layer/dedupe_reservoir.py index 3c0106b..8a56604 100644 --- a/aws_doc_sdk_examples_tools/lliam/service_layer/dedupe_reservoir.py +++ b/aws_doc_sdk_examples_tools/lliam/service_layer/dedupe_reservoir.py @@ -1,7 +1,10 @@ +import logging +import re + from collections import Counter from dataclasses import replace -import logging -from typing import Dict +from pathlib import Path +from typing import Dict, Iterable, List from aws_doc_sdk_examples_tools.doc_gen import DocGen from aws_doc_sdk_examples_tools.lliam.domain.commands import DedupeReservoir @@ -12,33 +15,75 @@ logger = logging.getLogger(__name__) -def make_title_abbreviation(example: Example, counter: Counter): +def make_abbrev(example: Example, counter: Counter) -> str: + if not example.title_abbrev: + return "" + count = counter[example.title_abbrev] abbrev = f"{example.title_abbrev} ({count + 1})" if count else example.title_abbrev counter[example.title_abbrev] += 1 return abbrev -def handle_dedupe_reservoir(cmd: DedupeReservoir, uow: None): - doc_gen = DocGen.from_root(cmd.root, validation=ValidationConfig(check_aws=False)) +def reset_abbrev_count(examples: Dict[str, Example]) -> Dict[str, Example]: + """ + Reset all duplicate title abbreviations back to their un-enumerated state. + + I don't love this. Ideally we would only update new title_abbrev fields + with the incremented count. But there's no way to know which ones are new + or even which particular title_abbrev is the original. - examples: Dict[str, Example] = {} + Ex. + title_abbrev: some policy + title_abbrev: some policy (2) + title_abbrev: some policy + title_abbrev: some policy - for id, example in doc_gen.examples.items(): - if cmd.packages and example.file: - package = example.file.name.split("_metadata.yaml")[0] - if package in cmd.packages: - examples[id] = example - else: - examples[id] = example + Which one is the original? Which ones are new? + """ - title_abbrev_counts: Counter = Counter() + updated_examples = {} for id, example in examples.items(): - examples[id] = replace( + updated_examples[id] = replace( example, - title_abbrev=make_title_abbreviation(example, title_abbrev_counts), + title_abbrev=re.sub(r"(\s\(\d+\))*$", "", example.title_abbrev or ""), ) + return updated_examples + + +def example_in_packages(example: Example, packages: List[str]) -> bool: + if packages and example.file: + (example_pkg_name, *_) = example.file.name.split("_metadata.yaml") + if not example_pkg_name in packages: + return False + return True + + +def dedupe_examples( + examples: Dict[str, Example], packages: List[str] +) -> Dict[str, Example]: + filtered = { + id: ex for id, ex in examples.items() if example_in_packages(ex, packages) + } + + reset_examples = reset_abbrev_count(filtered) + + counter: Counter = Counter() + + return { + id: replace(ex, title_abbrev=make_abbrev(ex, counter)) + for id, ex in reset_examples.items() + } + + +def write_examples(examples: Dict[str, Example], root: Path): writes = prepare_write(examples) - write_many(cmd.root, writes) + write_many(root, writes) + + +def handle_dedupe_reservoir(cmd: DedupeReservoir, uow: None): + doc_gen = DocGen.from_root(cmd.root, validation=ValidationConfig(check_aws=False)) + examples = dedupe_examples(doc_gen.examples, cmd.packages) + write_examples(examples, cmd.root) diff --git a/aws_doc_sdk_examples_tools/lliam/service_layer/run_ailly.py b/aws_doc_sdk_examples_tools/lliam/service_layer/run_ailly.py index bc6a651..fe45a71 100644 --- a/aws_doc_sdk_examples_tools/lliam/service_layer/run_ailly.py +++ b/aws_doc_sdk_examples_tools/lliam/service_layer/run_ailly.py @@ -22,6 +22,7 @@ "ailly", "--max-depth", "10", + "--no-overwrite", "--root", str(AILLY_DIR_PATH), ] diff --git a/aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py b/aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py index c8703cf..442f8d8 100644 --- a/aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py +++ b/aws_doc_sdk_examples_tools/lliam/service_layer/update_doc_gen.py @@ -1,7 +1,6 @@ from dataclasses import replace import json import logging -from collections import Counter from pathlib import Path from typing import Dict, Iterable, List @@ -37,7 +36,7 @@ def examples_from_updates(updates: Updates) -> Iterable[Example]: examples = [ Example( id=id, - file=None, + file=Path(), languages={}, title=update.get("title"), title_abbrev=update.get("title_abbrev"), diff --git a/aws_doc_sdk_examples_tools/lliam/test/dedupe_reservoir_test.py b/aws_doc_sdk_examples_tools/lliam/test/dedupe_reservoir_test.py new file mode 100644 index 0000000..983c137 --- /dev/null +++ b/aws_doc_sdk_examples_tools/lliam/test/dedupe_reservoir_test.py @@ -0,0 +1,122 @@ +from collections import Counter +from pathlib import Path + +from aws_doc_sdk_examples_tools.metadata import Example +from aws_doc_sdk_examples_tools.lliam.service_layer.dedupe_reservoir import ( + make_abbrev, + example_in_packages, + reset_abbrev_count, + dedupe_examples, +) + + +def test_make_abbrev_continues_numbering(): + """Test that numbering continues from existing numbered titles.""" + counter = Counter({"Some abbrev": 2}) + example = Example(id="test", file=Path(), languages={}, title_abbrev="Some abbrev") + result = make_abbrev(example, counter) + + assert result == "Some abbrev (3)" + + +def test_make_abbrev_first_occurrence(): + """Test that first occurrence doesn't get numbered.""" + counter = Counter() + example = Example(id="test", file=Path(), languages={}, title_abbrev="New abbrev") + result = make_abbrev(example, counter) + + assert result == "New abbrev" + assert counter["New abbrev"] == 1 + + +def test_example_in_packages_no_packages(): + """Test that example is included when no packages specified.""" + example = Example(id="test", file=Path("test_metadata.yaml"), languages={}) + result = example_in_packages(example, []) + + assert result is True + + +def test_example_in_packages_matching_package(): + """Test that example is included when package matches.""" + example = Example(id="test", file=Path("pkg1_metadata.yaml"), languages={}) + result = example_in_packages(example, ["pkg1", "pkg2"]) + + assert result is True + + +def test_example_in_packages_non_matching_package(): + """Test that example is excluded when package doesn't match.""" + example = Example(id="test", file=Path("pkg3_metadata.yaml"), languages={}) + result = example_in_packages(example, ["pkg1", "pkg2"]) + + assert result is False + + +def test_build_abbrev_counter(): + """Test building counter from examples with existing numbered titles.""" + examples = { + "1": Example(id="1", file=Path(), languages={}, title_abbrev="Test (1)"), + "2": Example(id="2", file=Path(), languages={}, title_abbrev="Test (2)"), + "3": Example(id="3", file=Path(), languages={}, title_abbrev="Other"), + "4": Example(id="4", file=Path(), languages={}, title_abbrev="Test"), + } + + result = reset_abbrev_count(examples) + + assert result["1"].title_abbrev == "Test" + assert result["2"].title_abbrev == "Test" + assert result["3"].title_abbrev == "Other" + assert result["4"].title_abbrev == "Test" + + +def test_build_abbrev_counter_empty(): + """Test building counter from empty examples list.""" + result = reset_abbrev_count({}) + + assert len(result) == 0 + + +def test_dedupe_examples(): + """Test deduping examples with existing numbered titles.""" + examples = { + "ex1": Example( + id="ex1", + file=Path("pkg1_metadata.yaml"), + languages={}, + title_abbrev="Test (2) (2)", + ), + "ex2": Example( + id="ex2", + file=Path("pkg1_metadata.yaml"), + languages={}, + title_abbrev="Test (3) (3) (3)", + ), + "ex3": Example( + id="ex3", file=Path("pkg1_metadata.yaml"), languages={}, title_abbrev="Test" + ), + "ex4": Example( + id="ex4", file=Path("pkg1_metadata.yaml"), languages={}, title_abbrev="Test" + ), + "ex5": Example( + id="ex5", file=Path("pkg1_metadata.yaml"), languages={}, title_abbrev="Test" + ), + "ex6": Example( + id="ex6", file=Path("pkg2_metadata.yaml"), languages={}, title_abbrev="Test" + ), + } + + result = dedupe_examples(examples, []) + + assert len(result) == 6 + title_abbrevs = sorted( + [ex.title_abbrev for ex in result.values() if ex.title_abbrev] + ) + assert title_abbrevs == [ + "Test", + "Test (2)", + "Test (3)", + "Test (4)", + "Test (5)", + "Test (6)", + ] diff --git a/aws_doc_sdk_examples_tools/lliam/test/update_doc_gen_test.py b/aws_doc_sdk_examples_tools/lliam/test/update_doc_gen_test.py index 9d28392..4f5ca82 100644 --- a/aws_doc_sdk_examples_tools/lliam/test/update_doc_gen_test.py +++ b/aws_doc_sdk_examples_tools/lliam/test/update_doc_gen_test.py @@ -32,7 +32,7 @@ def test_update_examples_title_abbrev(doc_gen_tributary: DocGen): # Create an example with a title_abbrev to update update_example = Example( id="iam_policies_example", - file=None, + file=Path(), languages={}, title_abbrev="Updated Title Abbrev", ) diff --git a/aws_doc_sdk_examples_tools/metadata.py b/aws_doc_sdk_examples_tools/metadata.py index 92a0dd9..84cb4be 100755 --- a/aws_doc_sdk_examples_tools/metadata.py +++ b/aws_doc_sdk_examples_tools/metadata.py @@ -139,7 +139,7 @@ def validate(self, errors: MetadataErrors, root: Path): @dataclass class Example: id: str - file: Optional[Path] + file: Path languages: Dict[str, Language] # Human readable title. title: Optional[str] = field(default="") diff --git a/aws_doc_sdk_examples_tools/yaml_mapper.py b/aws_doc_sdk_examples_tools/yaml_mapper.py index 2470c91..d2bbef3 100644 --- a/aws_doc_sdk_examples_tools/yaml_mapper.py +++ b/aws_doc_sdk_examples_tools/yaml_mapper.py @@ -1,6 +1,7 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +from pathlib import Path from typing import Dict, Set, Tuple, Any, List, Optional, Union from .metadata import ( Example, @@ -112,7 +113,7 @@ def example_from_yaml( return ( Example( id="", - file=None, + file=Path(), title=title, title_abbrev=title_abbrev, category=category,