From 0c19972562364e4c0b1621b25a81ad842d421ee5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Thu, 5 Mar 2026 03:55:24 +0100 Subject: [PATCH 1/7] use Path instead of str. add UntranslatedEntry dataclass --- PythonScripts/audit_translations/auditor.py | 27 ++++----- PythonScripts/audit_translations/cli.py | 7 +-- .../audit_translations/line_resolver.py | 2 +- .../{dataclasses.py => models.py} | 24 +++++--- PythonScripts/audit_translations/parsers.py | 19 +++---- PythonScripts/audit_translations/renderer.py | 6 +- .../audit_translations/tests/test_auditor.py | 57 ++++++++++--------- .../tests/test_cli_end_to_end.py | 2 +- .../tests/test_line_resolver.py | 2 +- .../tests/test_output_objects.py | 8 +-- .../audit_translations/tests/test_parsers.py | 10 ++-- 11 files changed, 84 insertions(+), 80 deletions(-) rename PythonScripts/audit_translations/{dataclasses.py => models.py} (84%) diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py index d30126c5..223c4b75 100644 --- a/PythonScripts/audit_translations/auditor.py +++ b/PythonScripts/audit_translations/auditor.py @@ -11,13 +11,10 @@ from rich.panel import Panel from rich.table import Table -from .dataclasses import ComparisonResult, RuleInfo +from .models import ComparisonResult, RuleInfo from .parsers import diff_rules, parse_yaml_file from .renderer import console, print_warnings -# Re-export console so existing `from .auditor import console` callers keep working. -__all__ = ["console"] - GREEN_FILE_COUNT_THRESHOLD = 7 YELLOW_FILE_COUNT_THRESHOLD = 4 @@ -72,16 +69,16 @@ def collect_from(directory: Path, root: Path) -> None: def compare_files( - english_path: str, - translated_path: str, + english_path: Path, + translated_path: Path, issue_filter: set[str] | None = None, - translated_region_path: str | None = None, - english_region_path: str | None = None, + translated_region_path: Path | None = None, + english_region_path: Path | None = None, ) -> ComparisonResult: """Compare English and translated YAML files""" - def load_rules(path: str | None) -> list[RuleInfo]: - if path and Path(path).exists(): + def load_rules(path: Path | None) -> list[RuleInfo]: + if path and path.exists(): rules, _ = parse_yaml_file(path) return rules return [] @@ -210,11 +207,11 @@ def audit_language( continue result = compare_files( - str(english_path), - str(translated_path), + english_path, + translated_path, issue_filter, - str(translated_region_path) if translated_region_path and translated_region_path.exists() else None, - str(english_region_path) if english_region_path and english_region_path.exists() else None, + translated_region_path if translated_region_path and translated_region_path.exists() else None, + english_region_path if english_region_path and english_region_path.exists() else None, ) if result.has_issues: @@ -226,7 +223,7 @@ def audit_language( files_ok += 1 total_missing += len(result.missing_rules) - total_untranslated += sum(len(entries) for _, entries in result.untranslated_text) + total_untranslated += sum(len(entries) for _rule, entries in result.untranslated_text) total_extra += len(result.extra_rules) total_differences += len(result.rule_differences) diff --git a/PythonScripts/audit_translations/cli.py b/PythonScripts/audit_translations/cli.py index 1ae382a0..ed04c402 100644 --- a/PythonScripts/audit_translations/cli.py +++ b/PythonScripts/audit_translations/cli.py @@ -7,7 +7,8 @@ import argparse import sys -from .auditor import audit_language, console, list_languages +from .auditor import audit_language, list_languages +from .renderer import console def main() -> None: @@ -65,7 +66,3 @@ def main() -> None: issue_filter, args.verbose, ) - - -if __name__ == "__main__": - main() diff --git a/PythonScripts/audit_translations/line_resolver.py b/PythonScripts/audit_translations/line_resolver.py index df867fd6..8950049d 100644 --- a/PythonScripts/audit_translations/line_resolver.py +++ b/PythonScripts/audit_translations/line_resolver.py @@ -4,7 +4,7 @@ Maps rule diff types and structure tokens to precise YAML source line numbers. """ -from .dataclasses import DiffType, RuleDifference, RuleInfo +from .models import DiffType, RuleDifference, RuleInfo from .parsers import extract_structure_elements diff --git a/PythonScripts/audit_translations/dataclasses.py b/PythonScripts/audit_translations/models.py similarity index 84% rename from PythonScripts/audit_translations/dataclasses.py rename to PythonScripts/audit_translations/models.py index 03b27996..e156073c 100644 --- a/PythonScripts/audit_translations/dataclasses.py +++ b/PythonScripts/audit_translations/models.py @@ -6,6 +6,7 @@ from dataclasses import dataclass, field from enum import StrEnum +from pathlib import Path from typing import Any @@ -27,6 +28,15 @@ class DiffType(StrEnum): STRUCTURE = "structure" # Control-flow block shape/order differs (if/then/else/with/replace). +@dataclass +class UntranslatedEntry: + """A single untranslated text fragment found in a rule.""" + + key: str + text: str + line: int | None + + @dataclass class RuleInfo: """ @@ -46,9 +56,9 @@ class RuleInfo: Raw YAML block for this rule (used for reporting/snippets). data : Any | None Parsed YAML node for the rule; used for structural diffs. - untranslated_entries : list[tuple[str, str, int | None]] - List of (key, text, line) entries extracted from lowercase translation keys. - This preserves exact text fragments and YAML line numbers for diagnostics. + untranslated_entries : list[UntranslatedEntry] + Entries extracted from lowercase translation keys. + Preserves exact text fragments and YAML line numbers for diagnostics. line_map : dict[str, list[int]] Mapping of element type to line numbers for rule components like match, conditions, variables, and structural tokens. This is used to point @@ -63,7 +73,7 @@ class RuleInfo: line_number: int raw_content: str data: Any | None = None - untranslated_entries: list[tuple[str, str, int | None]] = field(default_factory=list) + untranslated_entries: list[UntranslatedEntry] = field(default_factory=list) line_map: dict[str, list[int]] = field(default_factory=dict) audit_ignore: bool = False @@ -73,7 +83,7 @@ def has_untranslated_text(self) -> bool: @property def untranslated_keys(self) -> list[str]: - return [entry[1] for entry in self.untranslated_entries] + return [entry.text for entry in self.untranslated_entries] @dataclass @@ -98,8 +108,8 @@ class ComparisonResult: missing_rules: list[RuleInfo] # Rules in English but not in translation extra_rules: list[RuleInfo] # Rules in translation but not in English - untranslated_text: list[tuple[RuleInfo, list[tuple[str, str, int | None]]]] # Rules with lowercase t/ot/ct - file_path: str + untranslated_text: list[tuple[RuleInfo, list[UntranslatedEntry]]] + file_path: Path | str english_rule_count: int translated_rule_count: int rule_differences: list[RuleDifference] = field(default_factory=list) # Fine-grained diffs diff --git a/PythonScripts/audit_translations/parsers.py b/PythonScripts/audit_translations/parsers.py index e1540300..f8142da3 100644 --- a/PythonScripts/audit_translations/parsers.py +++ b/PythonScripts/audit_translations/parsers.py @@ -13,7 +13,7 @@ from ruamel.yaml import YAML from ruamel.yaml.scanner import ScannerError -from .dataclasses import DiffType, RuleDifference, RuleInfo +from .models import DiffType, RuleDifference, RuleInfo, UntranslatedEntry _yaml = YAML() _yaml.preserve_quotes = True @@ -22,13 +22,12 @@ _MATCH_EXPR = parse("$.match") -def is_unicode_file(file_path: str) -> bool: +def is_unicode_file(file_path: Path) -> bool: """Check if this is a unicode.yaml or unicode-full.yaml file""" - basename = Path(file_path).name - return basename in ("unicode.yaml", "unicode-full.yaml") + return file_path.name in ("unicode.yaml", "unicode-full.yaml") -def parse_yaml_file(file_path: str, strict: bool = False) -> tuple[list[RuleInfo], str]: +def parse_yaml_file(file_path: Path, strict: bool = False) -> tuple[list[RuleInfo], str]: """ Parse a YAML file and extract rules. Returns list of RuleInfo and the raw file content. @@ -165,15 +164,15 @@ def find_untranslated_text_values(node: Any) -> list[str]: Find lowercase text keys (t, ot, ct, spell, pronounce, ifthenelse) that should be uppercase in translations. Returns list of the untranslated text values found. """ - return [entry[1] for entry in find_untranslated_text_entries(node)] + return [entry.text for entry in find_untranslated_text_entries(node)] -def find_untranslated_text_entries(node: Any) -> list[tuple[str, str, int | None]]: +def find_untranslated_text_entries(node: Any) -> list[UntranslatedEntry]: """ Find lowercase text keys (t, ot, ct, spell, pronounce, ifthenelse) and their line numbers. - Returns list of (key, text, line_number) entries. Line number is 1-based when available. + Returns list of UntranslatedEntry. Line number is 1-based when available. """ - entries: list[tuple[str, str, int | None]] = [] + entries: list[UntranslatedEntry] = [] translation_keys = {"t", "ot", "ct", "spell", "pronounce", "ifthenelse"} def should_add(text: str) -> bool: @@ -185,7 +184,7 @@ def should_add(text: str) -> bool: for key, child, parent in iter_field_matches(node): if key.lower() in translation_keys and not key.isupper() and isinstance(child, str) and should_add(child): - entries.append((key, child, mapping_key_line(parent, key))) + entries.append(UntranslatedEntry(key, child, mapping_key_line(parent, key))) return entries diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py index aa4135c1..438347bc 100644 --- a/PythonScripts/audit_translations/renderer.py +++ b/PythonScripts/audit_translations/renderer.py @@ -10,7 +10,7 @@ from rich.console import Console from rich.markup import escape -from .dataclasses import ComparisonResult, DiffType, IssueType, RuleDifference, RuleInfo +from .models import ComparisonResult, DiffType, IssueType, RuleDifference, RuleInfo, UntranslatedEntry from .line_resolver import resolve_diff_lines console = Console() @@ -80,8 +80,8 @@ def add_issue(rule: RuleInfo, group_key: IssueGroupKey, payload: dict[str, Any]) add_issue(rule, issue_group_key(IssueType.MISSING_RULE), {"line_en": rule.line_number}) for rule, entries in result.untranslated_text: - for _, text, line in entries: - add_issue(rule, issue_group_key(IssueType.UNTRANSLATED_TEXT), {"line_tr": line or rule.line_number, "text": text}) + for entry in entries: + add_issue(rule, issue_group_key(IssueType.UNTRANSLATED_TEXT), {"line_tr": entry.line or rule.line_number, "text": entry.text}) for diff in result.rule_differences: lines = resolve_diff_lines(diff) diff --git a/PythonScripts/audit_translations/tests/test_auditor.py b/PythonScripts/audit_translations/tests/test_auditor.py index 0909182d..921ddce5 100644 --- a/PythonScripts/audit_translations/tests/test_auditor.py +++ b/PythonScripts/audit_translations/tests/test_auditor.py @@ -6,8 +6,9 @@ import pytest -from ..auditor import compare_files, console, get_yaml_files, list_languages -from ..dataclasses import ComparisonResult, DiffType, RuleDifference, RuleInfo +from ..auditor import compare_files, get_yaml_files, list_languages +from ..renderer import console +from ..models import ComparisonResult, DiffType, RuleDifference, RuleInfo, UntranslatedEntry from ..line_resolver import resolve_diff_lines from ..renderer import print_warnings @@ -57,12 +58,12 @@ def aggregate_issue_counts( missing = untranslated = extra = diffs = total = 0 for file_name in files: result = compare_files( - str(english_dir / file_name), - str(translated_dir / file_name), + english_dir / file_name, + translated_dir / file_name, issue_filter, ) missing += len(result.missing_rules) - untranslated += sum(len(entries) for _, entries in result.untranslated_text) + untranslated += sum(len(entries) for _rule, entries in result.untranslated_text) extra += len(result.extra_rules) diffs += len(result.rule_differences) total += len(result.missing_rules) + len(result.extra_rules) + len(result.rule_differences) @@ -91,7 +92,7 @@ def test_comparison_result_object_fields() -> None: result = ComparisonResult( missing_rules=[missing], extra_rules=[extra], - untranslated_text=[(untranslated, [("t", "x", 31)])], + untranslated_text=[(untranslated, [UntranslatedEntry("t", "x", 31)])], rule_differences=[diff], file_path="", english_rule_count=1, @@ -101,7 +102,7 @@ def test_comparison_result_object_fields() -> None: assert result.missing_rules[0].line_number == 10 assert result.extra_rules[0].line_number == 20 assert result.untranslated_text[0][0].line_number == 30 - assert result.untranslated_text[0][1] == [("t", "x", 31)] + assert result.untranslated_text[0][1] == [UntranslatedEntry("t", "x", 31)] assert result.rule_differences[0].diff_type is DiffType.MATCH assert resolve_diff_lines(result.rule_differences[0]) == (40, 41) @@ -187,10 +188,10 @@ def test_compare_files_merges_region_rules(tmp_path) -> None: ) result = compare_files( - str(english_file), - str(translated_file), + english_file, + translated_file, None, - str(translated_region_file), + translated_region_file, ) assert result.missing_rules == [] @@ -227,7 +228,7 @@ def test_compare_files_skips_untranslated_and_diffs_when_audit_ignored(tmp_path) encoding="utf-8", ) - result = compare_files(str(english_file), str(translated_file)) + result = compare_files(english_file, translated_file) assert result.missing_rules == [] assert result.extra_rules == [] @@ -315,8 +316,8 @@ def test_print_warnings_omits_snippets_when_not_verbose(fixed_console_width) -> fixtures_dir = base_dir / "fixtures" golden_path = base_dir / "golden" / "rich" / "structure_diff_nonverbose.golden" result = compare_files( - str(fixtures_dir / "en" / "structure_diff.yaml"), - str(fixtures_dir / "de" / "structure_diff.yaml"), + fixtures_dir / "en" / "structure_diff.yaml", + fixtures_dir / "de" / "structure_diff.yaml", ) with console.capture() as capture: @@ -336,8 +337,8 @@ def test_print_warnings_includes_snippets_when_verbose(fixed_console_width) -> N fixtures_dir = base_dir / "fixtures" golden_path = base_dir / "golden" / "rich" / "structure_diff_verbose.golden" result = compare_files( - str(fixtures_dir / "en" / "structure_diff.yaml"), - str(fixtures_dir / "de" / "structure_diff.yaml"), + fixtures_dir / "en" / "structure_diff.yaml", + fixtures_dir / "de" / "structure_diff.yaml", ) with console.capture() as capture: @@ -359,8 +360,8 @@ def test_misaligned_structure_differences_are_reported() -> None: fixtures_dir = base_dir / "fixtures" result = compare_files( - str(fixtures_dir / "en" / "structure_misaligned.yaml"), - str(fixtures_dir / "de" / "structure_misaligned.yaml"), + fixtures_dir / "en" / "structure_misaligned.yaml", + fixtures_dir / "de" / "structure_misaligned.yaml", ) # The result should detect that structures differ @@ -384,8 +385,8 @@ def test_missing_else_block_is_still_reported() -> None: fixtures_dir = base_dir / "fixtures" result = compare_files( - str(fixtures_dir / "en" / "structure_missing_else.yaml"), - str(fixtures_dir / "de" / "structure_missing_else.yaml"), + fixtures_dir / "en" / "structure_missing_else.yaml", + fixtures_dir / "de" / "structure_missing_else.yaml", ) # Should detect structure difference @@ -434,7 +435,7 @@ def test_structure_diff_uses_position_aware_token_occurrence_for_missing_block(t encoding="utf-8", ) - result = compare_files(str(english_file), str(translated_file)) + result = compare_files(english_file, translated_file) lines_by_type = resolved_diff_lines_by_type(result) assert len(lines_by_type.get("structure", [])) == 1 assert lines_by_type["structure"][0] == (7, 7) @@ -469,7 +470,7 @@ def test_structure_substitution_diff_is_reported(tmp_path) -> None: encoding="utf-8", ) - result = compare_files(str(english_file), str(translated_file)) + result = compare_files(english_file, translated_file) assert any(diff.diff_type == "structure" for diff in result.rule_differences) lines_by_type = resolved_diff_lines_by_type(result) @@ -487,7 +488,7 @@ def test_structure_per_fraction_should_anchor_to_replace_lines_expected_behavior """ base_dir = Path(__file__).parent path = base_dir / "fixtures" / "repro" - result = compare_files(str(path / "en" / "per_fraction.yaml"), str(path / "nb" / "per_fraction.yaml")) + result = compare_files(path / "en" / "per_fraction.yaml", path / "nb" / "per_fraction.yaml") lines_by_type = resolved_diff_lines_by_type(result) assert len(lines_by_type.get("structure", [])) == 1 @@ -502,8 +503,8 @@ def test_print_warnings_shows_misaligned_structures() -> None: fixtures_dir = base_dir / "fixtures" result = compare_files( - str(fixtures_dir / "en" / "structure_misaligned.yaml"), - str(fixtures_dir / "de" / "structure_misaligned.yaml"), + fixtures_dir / "en" / "structure_misaligned.yaml", + fixtures_dir / "de" / "structure_misaligned.yaml", ) # Raw result should have structure differences detected @@ -533,8 +534,8 @@ def test_print_warnings_still_shows_missing_else() -> None: fixtures_dir = base_dir / "fixtures" result = compare_files( - str(fixtures_dir / "en" / "structure_missing_else.yaml"), - str(fixtures_dir / "de" / "structure_missing_else.yaml"), + fixtures_dir / "en" / "structure_missing_else.yaml", + fixtures_dir / "de" / "structure_missing_else.yaml", ) with console.capture() as capture: @@ -590,7 +591,7 @@ def test_print_warnings_groups_multiple_subgroups_for_single_rule(fixed_console_ result = ComparisonResult( missing_rules=[], extra_rules=[], - untranslated_text=[(tr, [("t", "first", 24), ("ct", "second", 25)])], + untranslated_text=[(tr, [UntranslatedEntry("t", "first", 24), UntranslatedEntry("ct", "second", 25)])], rule_differences=diffs, file_path="", english_rule_count=1, @@ -687,7 +688,7 @@ def test_print_warnings_verbose_shows_snippets_only_for_differences(fixed_consol result = ComparisonResult( missing_rules=[missing], extra_rules=[], - untranslated_text=[(tr_untranslated, [("t", "leave me", 21)])], + untranslated_text=[(tr_untranslated, [UntranslatedEntry("t", "leave me", 21)])], rule_differences=[diff], file_path="", english_rule_count=2, diff --git a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py index 3c9a10fc..2547b9ec 100644 --- a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py +++ b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py @@ -12,7 +12,7 @@ import pytest from .. import cli as audit_cli -from ..auditor import console +from ..renderer import console def fixture_rules_dir() -> Path: diff --git a/PythonScripts/audit_translations/tests/test_line_resolver.py b/PythonScripts/audit_translations/tests/test_line_resolver.py index e5f1eb0a..7bdb3292 100644 --- a/PythonScripts/audit_translations/tests/test_line_resolver.py +++ b/PythonScripts/audit_translations/tests/test_line_resolver.py @@ -2,7 +2,7 @@ Unit tests for line_resolver.py. """ -from ..dataclasses import RuleDifference, RuleInfo +from ..models import RuleDifference, RuleInfo from ..line_resolver import first_structure_mismatch, resolve_diff_lines diff --git a/PythonScripts/audit_translations/tests/test_output_objects.py b/PythonScripts/audit_translations/tests/test_output_objects.py index 316bff06..5390a234 100644 --- a/PythonScripts/audit_translations/tests/test_output_objects.py +++ b/PythonScripts/audit_translations/tests/test_output_objects.py @@ -16,8 +16,8 @@ def collect_issue_tuples(language: str = "de", issue_filter: set[str] | None = N for english_path in sorted(english_dir.glob("*.yaml")): file_name = english_path.name result = compare_files( - str(english_path), - str(translated_dir / file_name), + english_path, + translated_dir / file_name, issue_filter, ) @@ -28,8 +28,8 @@ def collect_issue_tuples(language: str = "de", issue_filter: set[str] | None = N rows.append((file_name, "extra_rule", rule.key, "", None, rule.line_number, "")) for rule, entries in result.untranslated_text: - for _key, text, line in entries: - rows.append((file_name, "untranslated_text", rule.key, "", None, line or rule.line_number, text)) + for entry in entries: + rows.append((file_name, "untranslated_text", rule.key, "", None, entry.line or rule.line_number, entry.text)) for diff in result.rule_differences: lines = resolve_diff_lines(diff) diff --git a/PythonScripts/audit_translations/tests/test_parsers.py b/PythonScripts/audit_translations/tests/test_parsers.py index 2463209a..32dbb70e 100644 --- a/PythonScripts/audit_translations/tests/test_parsers.py +++ b/PythonScripts/audit_translations/tests/test_parsers.py @@ -6,7 +6,7 @@ from ruamel.yaml import YAML from ruamel.yaml.scanner import ScannerError -from ..dataclasses import RuleDifference, RuleInfo +from ..models import RuleDifference, RuleInfo, UntranslatedEntry from ..parsers import ( build_line_map, diff_rules, @@ -108,7 +108,7 @@ def test_finds_entries_with_lines(self): """ data = yaml.load(content) entries = find_untranslated_text_entries(data[0]) - assert entries == [("t", "not translated", 4)] + assert entries == [UntranslatedEntry("t", "not translated", 4)] class TestParseRulesFile: @@ -159,7 +159,7 @@ def test_detects_untranslated_text(self): rules = parse_rules_file(content, data) assert rules[0].has_untranslated_text assert "not translated" in rules[0].untranslated_keys - assert rules[0].untranslated_entries == [("t", "not translated", 4)] + assert rules[0].untranslated_entries == [UntranslatedEntry("t", "not translated", 4)] def test_detects_audit_ignore(self): """Ensure detects audit ignore.""" @@ -248,7 +248,7 @@ def test_parse_yaml_file_handles_tabs(self, tmp_path): file_path.write_text(content, encoding="utf-8") from ..parsers import parse_yaml_file - rules, _ = parse_yaml_file(str(file_path)) + rules, _ = parse_yaml_file(file_path) assert len(rules) == 1 assert rules[0].name == "tabbed" @@ -265,7 +265,7 @@ def test_parse_yaml_file_strict_rejects_tabs(self, tmp_path): from ..parsers import parse_yaml_file with pytest.raises(ScannerError): - parse_yaml_file(str(file_path), strict=True) + parse_yaml_file(file_path, strict=True) class TestParseUnicodeFile: From a17bc58610a40ad2c5f4e8d0b469053310861bb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Thu, 5 Mar 2026 03:58:34 +0100 Subject: [PATCH 2/7] run ruff --- PythonScripts/audit_translations/renderer.py | 9 +++++++-- PythonScripts/audit_translations/tests/test_auditor.py | 5 ++--- .../audit_translations/tests/test_line_resolver.py | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py index 438347bc..da422ea1 100644 --- a/PythonScripts/audit_translations/renderer.py +++ b/PythonScripts/audit_translations/renderer.py @@ -10,8 +10,8 @@ from rich.console import Console from rich.markup import escape -from .models import ComparisonResult, DiffType, IssueType, RuleDifference, RuleInfo, UntranslatedEntry from .line_resolver import resolve_diff_lines +from .models import ComparisonResult, DiffType, IssueType, RuleDifference, RuleInfo console = Console() @@ -81,7 +81,12 @@ def add_issue(rule: RuleInfo, group_key: IssueGroupKey, payload: dict[str, Any]) for rule, entries in result.untranslated_text: for entry in entries: - add_issue(rule, issue_group_key(IssueType.UNTRANSLATED_TEXT), {"line_tr": entry.line or rule.line_number, "text": entry.text}) + add_issue( + rule, + issue_group_key(IssueType.UNTRANSLATED_TEXT), + {"line_tr": entry.line or rule.line_number, + "text": entry.text} + ) for diff in result.rule_differences: lines = resolve_diff_lines(diff) diff --git a/PythonScripts/audit_translations/tests/test_auditor.py b/PythonScripts/audit_translations/tests/test_auditor.py index 921ddce5..0214bf56 100644 --- a/PythonScripts/audit_translations/tests/test_auditor.py +++ b/PythonScripts/audit_translations/tests/test_auditor.py @@ -7,10 +7,9 @@ import pytest from ..auditor import compare_files, get_yaml_files, list_languages -from ..renderer import console -from ..models import ComparisonResult, DiffType, RuleDifference, RuleInfo, UntranslatedEntry from ..line_resolver import resolve_diff_lines -from ..renderer import print_warnings +from ..models import ComparisonResult, DiffType, RuleDifference, RuleInfo, UntranslatedEntry +from ..renderer import console, print_warnings @pytest.fixture() diff --git a/PythonScripts/audit_translations/tests/test_line_resolver.py b/PythonScripts/audit_translations/tests/test_line_resolver.py index 7bdb3292..569ce4ef 100644 --- a/PythonScripts/audit_translations/tests/test_line_resolver.py +++ b/PythonScripts/audit_translations/tests/test_line_resolver.py @@ -2,8 +2,8 @@ Unit tests for line_resolver.py. """ -from ..models import RuleDifference, RuleInfo from ..line_resolver import first_structure_mismatch, resolve_diff_lines +from ..models import RuleDifference, RuleInfo def _make_rule(name: str, line_map: dict, line_number: int = 1) -> RuleInfo: From af46adc6bc2ca60447da2ce01b1570cc0f991cab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Thu, 5 Mar 2026 04:01:09 +0100 Subject: [PATCH 3/7] run ruff --- PythonScripts/audit_translations/renderer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py index da422ea1..a8eb8c12 100644 --- a/PythonScripts/audit_translations/renderer.py +++ b/PythonScripts/audit_translations/renderer.py @@ -84,8 +84,7 @@ def add_issue(rule: RuleInfo, group_key: IssueGroupKey, payload: dict[str, Any]) add_issue( rule, issue_group_key(IssueType.UNTRANSLATED_TEXT), - {"line_tr": entry.line or rule.line_number, - "text": entry.text} + {"line_tr": entry.line or rule.line_number, "text": entry.text}, ) for diff in result.rule_differences: From 1bac7595dcf62cc36c06c76b94ecb5c5cc0ae90e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Thu, 5 Mar 2026 04:07:29 +0100 Subject: [PATCH 4/7] modularize functionality --- PythonScripts/audit_translations/auditor.py | 3 +- PythonScripts/audit_translations/differ.py | 98 ++++++++++ .../audit_translations/extractors.py | 98 ++++++++++ .../audit_translations/line_resolver.py | 2 +- PythonScripts/audit_translations/parsers.py | 176 +----------------- .../audit_translations/tests/test_parsers.py | 10 +- 6 files changed, 207 insertions(+), 180 deletions(-) create mode 100644 PythonScripts/audit_translations/differ.py create mode 100644 PythonScripts/audit_translations/extractors.py diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py index 223c4b75..0f61c080 100644 --- a/PythonScripts/audit_translations/auditor.py +++ b/PythonScripts/audit_translations/auditor.py @@ -12,7 +12,8 @@ from rich.table import Table from .models import ComparisonResult, RuleInfo -from .parsers import diff_rules, parse_yaml_file +from .differ import diff_rules +from .parsers import parse_yaml_file from .renderer import console, print_warnings GREEN_FILE_COUNT_THRESHOLD = 7 diff --git a/PythonScripts/audit_translations/differ.py b/PythonScripts/audit_translations/differ.py new file mode 100644 index 00000000..574c4399 --- /dev/null +++ b/PythonScripts/audit_translations/differ.py @@ -0,0 +1,98 @@ +""" +Rule diffing logic. + +Compares English and translated rules to find fine-grained structural differences. +""" + +from .extractors import ( + extract_conditions, + extract_match_pattern, + extract_structure_elements, + extract_variables, + normalize_xpath, +) +from .models import DiffType, RuleDifference, RuleInfo + + +def dedup_list(values: list[str]) -> list[str]: + """ + Return a list without duplicates while preserving first-seen order. + Originally, rule differences were stored as sets, losing their original order, + which is not helpful and why it changed with the help of this function. + + Example: + >>> dedup_list(["if:a", "if:b", "if:a"]) + ['if:a', 'if:b'] + """ + return list(dict.fromkeys(values)) # dict preserves insertion order (guaranteed in Python 3.7+) + + +def diff_rules(english_rule: RuleInfo, translated_rule: RuleInfo) -> list[RuleDifference]: + """ + Compare two rules and return fine-grained differences. + Ignores text content differences (T/t values) but catches structural changes. + """ + differences: list[RuleDifference] = [] + + def add_difference(diff_type: DiffType, description: str, english_snippet: str, translated_snippet: str) -> None: + differences.append( + RuleDifference( + english_rule, + translated_rule, + diff_type, + description, + english_snippet, + translated_snippet, + ) + ) + + # Check match pattern differences + en_match_raw = extract_match_pattern(english_rule.data) + tr_match_raw = extract_match_pattern(translated_rule.data) + en_match = normalize_xpath(en_match_raw) + tr_match = normalize_xpath(tr_match_raw) + if en_match != tr_match and en_match and tr_match: + add_difference(DiffType.MATCH, "Match pattern differs", en_match, tr_match) + + # Check condition differences + en_conditions_raw = extract_conditions(english_rule.data) + tr_conditions_raw = extract_conditions(translated_rule.data) + en_conditions = [normalize_xpath(c) for c in en_conditions_raw] + tr_conditions = [normalize_xpath(c) for c in tr_conditions_raw] + if en_conditions != tr_conditions: + # Find specific differences + en_set, tr_set = set(en_conditions), set(tr_conditions) + if en_set != tr_set: + add_difference( + DiffType.CONDITION, + "Conditions differ", + ", ".join(dedup_list(en_conditions)) or "(none)", + ", ".join(dedup_list(tr_conditions)) or "(none)", + ) + + # Check variable differences + en_vars = extract_variables(english_rule.data) + tr_vars = extract_variables(translated_rule.data) + if en_vars != tr_vars: + en_var_names = {v[0] for v in en_vars} + tr_var_names = {v[0] for v in tr_vars} + if en_var_names != tr_var_names: + add_difference( + DiffType.VARIABLES, + "Variable definitions differ", + ", ".join(sorted(en_var_names)) or "(none)", + ", ".join(sorted(tr_var_names)) or "(none)", + ) + + # Check structural differences (test/if/then/else blocks) + en_structure = extract_structure_elements(english_rule.data) + tr_structure = extract_structure_elements(translated_rule.data) + if en_structure != tr_structure: + add_difference( + DiffType.STRUCTURE, + "Rule structure differs (test/if/then/else blocks)", + " ".join(en_structure), + " ".join(tr_structure), + ) + + return differences diff --git a/PythonScripts/audit_translations/extractors.py b/PythonScripts/audit_translations/extractors.py new file mode 100644 index 00000000..82121a48 --- /dev/null +++ b/PythonScripts/audit_translations/extractors.py @@ -0,0 +1,98 @@ +""" +Rule data extraction functions. + +Extracts structural elements, match patterns, conditions, and variables +from parsed YAML rule data. +""" + +from collections.abc import Iterator +from typing import Any + +from jsonpath_ng.ext import parse +from jsonpath_ng.jsonpath import Fields + +_ALL_FIELDS_EXPR = parse("$..*") # '..' is recursive descent +_MATCH_EXPR = parse("$.match") + + +def mapping_key_line(mapping: Any, key: str) -> int | None: + """ + - 'lc' is line and column in YAML file: https://yaml.dev/doc/ruamel.yaml/detail/ + """ + if hasattr(mapping, "lc") and hasattr(mapping.lc, "data"): + line_info = mapping.lc.data.get(key) + return line_info[0] + 1 + return None + + +def iter_field_matches(node: Any) -> Iterator[tuple[str, Any, Any]]: + """ + Iterate nested mapping fields using jsonpath. + + Returns tuples of (key, child_value, parent_mapping) in traversal order. + """ + for match in _ALL_FIELDS_EXPR.find(node): + path = match.path + if isinstance(path, Fields) and len(path.fields) == 1: + key = path.fields[0] + parent = match.context.value if match.context is not None else None + yield key, match.value, parent + + +def normalize_match(value: Any) -> str: + if isinstance(value, list): + return " ".join(str(item) for item in value) + if isinstance(value, str): + return value + return "" + + +def normalize_xpath(value: str) -> str: + return " ".join(value.split()) + + +def extract_match_pattern(rule_data: Any) -> str: + if isinstance(rule_data, dict): + matches = _MATCH_EXPR.find(rule_data) + if matches: + return normalize_match(matches[0].value) + return "" + + +def extract_conditions(rule_data: Any) -> list[str]: + """Extract all if/else conditions from a rule""" + conditions: list[str] = [] + for key, child, _ in iter_field_matches(rule_data): + if key in ("if", "else_if") and isinstance(child, str): + conditions.append(child) + return conditions + + +def extract_variables(rule_data: Any) -> list[tuple[str, str]]: + """Extract variable definitions from a rule""" + variables: list[tuple[str, str]] = [] + + def add_from_value(value: Any) -> None: + if isinstance(value, dict): + for name, expr in value.items(): + variables.append((str(name), str(expr))) + elif isinstance(value, list): + for item in value: + if isinstance(item, dict): + for name, expr in item.items(): + variables.append((str(name), str(expr))) + + for key, child, _ in iter_field_matches(rule_data): + if key == "variables": + add_from_value(child) + return variables + + +def extract_structure_elements(rule_data: Any) -> list[str]: + """Extract structural elements (test, with, replace blocks) ignoring text content""" + elements: list[str] = [] + tokens = {"test", "if", "else_if", "then", "else", "then_test", "else_test", "with", "replace", "intent"} + for key, _, _ in iter_field_matches(rule_data): + if key in tokens: + elements.append(f"{key}:") + return elements diff --git a/PythonScripts/audit_translations/line_resolver.py b/PythonScripts/audit_translations/line_resolver.py index 8950049d..da25d200 100644 --- a/PythonScripts/audit_translations/line_resolver.py +++ b/PythonScripts/audit_translations/line_resolver.py @@ -5,7 +5,7 @@ """ from .models import DiffType, RuleDifference, RuleInfo -from .parsers import extract_structure_elements +from .extractors import extract_structure_elements def _get_line_map_lines(rule: RuleInfo, kind: DiffType | str, token: str | None = None) -> list[int]: diff --git a/PythonScripts/audit_translations/parsers.py b/PythonScripts/audit_translations/parsers.py index f8142da3..590bc085 100644 --- a/PythonScripts/audit_translations/parsers.py +++ b/PythonScripts/audit_translations/parsers.py @@ -4,23 +4,18 @@ Handles parsing of rule files and unicode files to extract rule information. """ -from collections.abc import Iterator from pathlib import Path from typing import Any -from jsonpath_ng.ext import parse -from jsonpath_ng.jsonpath import Fields from ruamel.yaml import YAML from ruamel.yaml.scanner import ScannerError -from .models import DiffType, RuleDifference, RuleInfo, UntranslatedEntry +from .extractors import iter_field_matches, mapping_key_line +from .models import RuleInfo, UntranslatedEntry _yaml = YAML() _yaml.preserve_quotes = True -_ALL_FIELDS_EXPR = parse("$..*") # '..' is recursive descent -_MATCH_EXPR = parse("$.match") - def is_unicode_file(file_path: Path) -> bool: """Check if this is a unicode.yaml or unicode-full.yaml file""" @@ -73,30 +68,6 @@ def build_raw_blocks(lines: list[str], starts: list[int]) -> list[str]: return blocks -def mapping_key_line(mapping: Any, key: str) -> int | None: - """ - - 'lc' is line and column in YAML file: https://yaml.dev/doc/ruamel.yaml/detail/ - """ - if hasattr(mapping, "lc") and hasattr(mapping.lc, "data"): - line_info = mapping.lc.data.get(key) - return line_info[0] + 1 - return None - - -def iter_field_matches(node: Any) -> Iterator[tuple[str, Any, Any]]: - """ - Iterate nested mapping fields using jsonpath. - - Returns tuples of (key, child_value, parent_mapping) in traversal order. - """ - for match in _ALL_FIELDS_EXPR.find(node): - path = match.path - if isinstance(path, Fields) and len(path.fields) == 1: - key = path.fields[0] - parent = match.context.value if match.context is not None else None - yield key, match.value, parent - - def _extract_item_fields(item: Any, is_unicode: bool) -> tuple[str, str | None, str | None, Any] | None: if is_unicode: if isinstance(item, dict) and len(item) == 1: @@ -222,146 +193,3 @@ def add_line(kind: str, line: int | None) -> None: if key in structure_tokens: add_line(f"structure:{key}", mapping_key_line(parent, key)) return line_map - - -def normalize_match(value: Any) -> str: - if isinstance(value, list): - return " ".join(str(item) for item in value) - if isinstance(value, str): - return value - return "" - - -def normalize_xpath(value: str) -> str: - return " ".join(value.split()) - - -def dedup_list(values: list[str]) -> list[str]: - """ - Return a list without duplicates while preserving first-seen order. - Originally, rule differences were stored as sets, losing their original order, - which is not helpful and why it changed with the help of this function. - - Example: - >>> dedup_list(["if:a", "if:b", "if:a"]) - ['if:a', 'if:b'] - """ - return list(dict.fromkeys(values)) # dict preserves insertion order (guaranteed in Python 3.7+) - - -def extract_match_pattern(rule_data: Any) -> str: - if isinstance(rule_data, dict): - matches = _MATCH_EXPR.find(rule_data) - if matches: - return normalize_match(matches[0].value) - return "" - - -def extract_conditions(rule_data: Any) -> list[str]: - """Extract all if/else conditions from a rule""" - conditions: list[str] = [] - for key, child, _ in iter_field_matches(rule_data): - if key in ("if", "else_if") and isinstance(child, str): - conditions.append(child) - return conditions - - -def extract_variables(rule_data: Any) -> list[tuple[str, str]]: - """Extract variable definitions from a rule""" - variables: list[tuple[str, str]] = [] - - def add_from_value(value: Any) -> None: - if isinstance(value, dict): - for name, expr in value.items(): - variables.append((str(name), str(expr))) - elif isinstance(value, list): - for item in value: - if isinstance(item, dict): - for name, expr in item.items(): - variables.append((str(name), str(expr))) - - for key, child, _ in iter_field_matches(rule_data): - if key == "variables": - add_from_value(child) - return variables - - -def extract_structure_elements(rule_data: Any) -> list[str]: - """Extract structural elements (test, with, replace blocks) ignoring text content""" - elements: list[str] = [] - tokens = {"test", "if", "else_if", "then", "else", "then_test", "else_test", "with", "replace", "intent"} - for key, _, _ in iter_field_matches(rule_data): - if key in tokens: - elements.append(f"{key}:") - return elements - - -def diff_rules(english_rule: RuleInfo, translated_rule: RuleInfo) -> list[RuleDifference]: - """ - Compare two rules and return fine-grained differences. - Ignores text content differences (T/t values) but catches structural changes. - """ - differences: list[RuleDifference] = [] - - def add_difference(diff_type: DiffType, description: str, english_snippet: str, translated_snippet: str) -> None: - differences.append( - RuleDifference( - english_rule, - translated_rule, - diff_type, - description, - english_snippet, - translated_snippet, - ) - ) - - # Check match pattern differences - en_match_raw = extract_match_pattern(english_rule.data) - tr_match_raw = extract_match_pattern(translated_rule.data) - en_match = normalize_xpath(en_match_raw) - tr_match = normalize_xpath(tr_match_raw) - if en_match != tr_match and en_match and tr_match: - add_difference(DiffType.MATCH, "Match pattern differs", en_match, tr_match) - - # Check condition differences - en_conditions_raw = extract_conditions(english_rule.data) - tr_conditions_raw = extract_conditions(translated_rule.data) - en_conditions = [normalize_xpath(c) for c in en_conditions_raw] - tr_conditions = [normalize_xpath(c) for c in tr_conditions_raw] - if en_conditions != tr_conditions: - # Find specific differences - en_set, tr_set = set(en_conditions), set(tr_conditions) - if en_set != tr_set: - add_difference( - DiffType.CONDITION, - "Conditions differ", - ", ".join(dedup_list(en_conditions)) or "(none)", - ", ".join(dedup_list(tr_conditions)) or "(none)", - ) - - # Check variable differences - en_vars = extract_variables(english_rule.data) - tr_vars = extract_variables(translated_rule.data) - if en_vars != tr_vars: - en_var_names = {v[0] for v in en_vars} - tr_var_names = {v[0] for v in tr_vars} - if en_var_names != tr_var_names: - add_difference( - DiffType.VARIABLES, - "Variable definitions differ", - ", ".join(sorted(en_var_names)) or "(none)", - ", ".join(sorted(tr_var_names)) or "(none)", - ) - - # Check structural differences (test/if/then/else blocks) - en_structure = extract_structure_elements(english_rule.data) - tr_structure = extract_structure_elements(translated_rule.data) - if en_structure != tr_structure: - add_difference( - DiffType.STRUCTURE, - "Rule structure differs (test/if/then/else blocks)", - " ".join(en_structure), - " ".join(tr_structure), - ) - - return differences diff --git a/PythonScripts/audit_translations/tests/test_parsers.py b/PythonScripts/audit_translations/tests/test_parsers.py index 32dbb70e..9315ce5c 100644 --- a/PythonScripts/audit_translations/tests/test_parsers.py +++ b/PythonScripts/audit_translations/tests/test_parsers.py @@ -6,14 +6,16 @@ from ruamel.yaml import YAML from ruamel.yaml.scanner import ScannerError -from ..models import RuleDifference, RuleInfo, UntranslatedEntry -from ..parsers import ( - build_line_map, - diff_rules, +from ..differ import diff_rules +from ..extractors import ( extract_conditions, extract_match_pattern, extract_structure_elements, extract_variables, +) +from ..models import RuleDifference, RuleInfo, UntranslatedEntry +from ..parsers import ( + build_line_map, find_untranslated_text_entries, find_untranslated_text_values, has_audit_ignore, From d88e1c2a6da3f9b20e6587d695ba197ec0bc0ed9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Thu, 5 Mar 2026 04:24:21 +0100 Subject: [PATCH 5/7] move stuff to renderer.py --- PythonScripts/audit_translations/auditor.py | 70 ++++++-------------- PythonScripts/audit_translations/models.py | 14 ++++ PythonScripts/audit_translations/renderer.py | 62 ++++++++++++++++- 3 files changed, 95 insertions(+), 51 deletions(-) diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py index 0f61c080..bfe6501d 100644 --- a/PythonScripts/audit_translations/auditor.py +++ b/PythonScripts/audit_translations/auditor.py @@ -8,25 +8,10 @@ import sys from pathlib import Path -from rich.panel import Panel -from rich.table import Table - -from .models import ComparisonResult, RuleInfo from .differ import diff_rules +from .models import AuditSummary, ComparisonResult, RuleInfo from .parsers import parse_yaml_file -from .renderer import console, print_warnings - -GREEN_FILE_COUNT_THRESHOLD = 7 -YELLOW_FILE_COUNT_THRESHOLD = 4 - - -def file_count_color(file_count: int) -> str: - """Map number of translated YAML files to a display color.""" - if file_count >= GREEN_FILE_COUNT_THRESHOLD: - return "green" - if file_count >= YELLOW_FILE_COUNT_THRESHOLD: - return "yellow" - return "red" +from .renderer import console, print_audit_header, print_audit_summary, print_language_list, print_warnings def split_language_into_base_and_region(language: str) -> tuple[str, str | None]: @@ -184,10 +169,7 @@ def audit_language( # Get list of files to audit files = [specific_file] if specific_file else get_yaml_files(english_dir, english_region_dir) - # Print header - console.print(Panel(f"MathCAT Translation Audit: {language.upper()}", style="bold cyan")) - console.print("\n [dim]Comparing against English (en) reference files[/]") - console.print(f" [dim]Files to check: {len(files)}[/]") + print_audit_header(language, len(files)) total_issues = 0 total_missing = 0 @@ -228,48 +210,36 @@ def audit_language( total_extra += len(result.extra_rules) total_differences += len(result.rule_differences) - # Summary - table = Table(title="SUMMARY", title_style="bold", box=None, show_header=False, padding=(0, 2)) - table.add_column(width=30) - table.add_column() - for label, value, color in [ - ("Files checked", len(files), None), - ("Files with issues", files_with_issues, "yellow" if files_with_issues else "green"), - ("Files OK", files_ok, "green" if files_ok else None), - ("Missing rules", total_missing, "red" if total_missing else "green"), - ("Untranslated text", total_untranslated, "yellow" if total_untranslated else "green"), - ("Rule differences", total_differences, "magenta" if total_differences else "green"), - ("Extra rules", total_extra, "blue" if total_extra else None), - ]: - table.add_row(label, f"[{color}]{value}[/]" if color else str(value)) - console.print(Panel(table, style="cyan")) + print_audit_summary( + AuditSummary( + files_checked=len(files), + files_with_issues=files_with_issues, + files_ok=files_ok, + total_missing=total_missing, + total_untranslated=total_untranslated, + total_extra=total_extra, + total_differences=total_differences, + total_issues=total_issues, + ) + ) return total_issues def list_languages(rules_dir: str | None = None) -> None: """List available languages for auditing""" - console.print(Panel("Available Languages", style="bold cyan")) - - table = Table(show_header=True, header_style="dim") - table.add_column("Language", justify="center", style="cyan") - table.add_column("YAML files", justify="right") - rules_dir_path = get_rules_dir(rules_dir) + + languages: list[tuple[str, int]] = [] for lang_dir in sorted(rules_dir_path.iterdir()): if not lang_dir.is_dir() or lang_dir.name == "en": continue - base_count = len(get_yaml_files(lang_dir)) - color = file_count_color(base_count) - table.add_row(lang_dir.name, f"[{color}]{base_count}[/] files") + languages.append((lang_dir.name, len(get_yaml_files(lang_dir)))) for region_dir in sorted(lang_dir.iterdir()): if not region_dir.is_dir() or region_dir.name.lower() == "sharedrules": continue code = f"{lang_dir.name}-{region_dir.name}" - count = len(get_yaml_files(lang_dir, region_dir)) - region_color = file_count_color(count) - table.add_row(code, f"[{region_color}]{count}[/] files") + languages.append((code, len(get_yaml_files(lang_dir, region_dir)))) - console.print(table) - console.print("\n [dim]Reference: en (English) - base translation[/]\n") + print_language_list(languages) diff --git a/PythonScripts/audit_translations/models.py b/PythonScripts/audit_translations/models.py index e156073c..4df57174 100644 --- a/PythonScripts/audit_translations/models.py +++ b/PythonScripts/audit_translations/models.py @@ -117,3 +117,17 @@ class ComparisonResult: @property def has_issues(self) -> bool: return bool(self.missing_rules or self.untranslated_text or self.extra_rules or self.rule_differences) + + +@dataclass +class AuditSummary: + """Accumulated totals from a full language audit.""" + + files_checked: int + files_with_issues: int + files_ok: int + total_missing: int + total_untranslated: int + total_extra: int + total_differences: int + total_issues: int diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py index a8eb8c12..2d0f975d 100644 --- a/PythonScripts/audit_translations/renderer.py +++ b/PythonScripts/audit_translations/renderer.py @@ -9,9 +9,11 @@ from rich.console import Console from rich.markup import escape +from rich.panel import Panel +from rich.table import Table from .line_resolver import resolve_diff_lines -from .models import ComparisonResult, DiffType, IssueType, RuleDifference, RuleInfo +from .models import AuditSummary, ComparisonResult, DiffType, IssueType, RuleDifference, RuleInfo console = Console() @@ -138,3 +140,61 @@ def add_issue(rule: RuleInfo, group_key: IssueGroupKey, payload: dict[str, Any]) issues += len(entries) return issues + + +GREEN_FILE_COUNT_THRESHOLD = 7 +YELLOW_FILE_COUNT_THRESHOLD = 4 + + +def file_count_color(file_count: int) -> str: + """Map number of translated YAML files to a display color.""" + if file_count >= GREEN_FILE_COUNT_THRESHOLD: + return "green" + if file_count >= YELLOW_FILE_COUNT_THRESHOLD: + return "yellow" + return "red" + + +def print_audit_header(language: str, file_count: int) -> None: + """Print the audit header panel.""" + console.print(Panel(f"MathCAT Translation Audit: {language.upper()}", style="bold cyan")) + console.print("\n [dim]Comparing against English (en) reference files[/]") + console.print(f" [dim]Files to check: {file_count}[/]") + + +def print_audit_summary(summary: AuditSummary) -> None: + """Print the audit summary table.""" + table = Table(title="SUMMARY", title_style="bold", box=None, show_header=False, padding=(0, 2)) + table.add_column(width=30) + table.add_column() + for label, value, color in [ + ("Files checked", summary.files_checked, None), + ("Files with issues", summary.files_with_issues, "yellow" if summary.files_with_issues else "green"), + ("Files OK", summary.files_ok, "green" if summary.files_ok else None), + ("Missing rules", summary.total_missing, "red" if summary.total_missing else "green"), + ("Untranslated text", summary.total_untranslated, "yellow" if summary.total_untranslated else "green"), + ("Rule differences", summary.total_differences, "magenta" if summary.total_differences else "green"), + ("Extra rules", summary.total_extra, "blue" if summary.total_extra else None), + ]: + table.add_row(label, f"[{color}]{value}[/]" if color else str(value)) + console.print(Panel(table, style="cyan")) + + +def print_language_list(languages: list[tuple[str, int]]) -> None: + """Print the available languages table. + + Args: + languages: List of (language_code, yaml_file_count) tuples. + """ + console.print(Panel("Available Languages", style="bold cyan")) + + table = Table(show_header=True, header_style="dim") + table.add_column("Language", justify="center", style="cyan") + table.add_column("YAML files", justify="right") + + for code, count in languages: + color = file_count_color(count) + table.add_row(code, f"[{color}]{count}[/] files") + + console.print(table) + console.print("\n [dim]Reference: en (English) - base translation[/]\n") From 134c2627cb5ebfc252eaae9f951c1c82065404da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Thu, 5 Mar 2026 04:33:33 +0100 Subject: [PATCH 6/7] dont throw generic error. split up tests --- PythonScripts/audit_translations/auditor.py | 13 +- PythonScripts/audit_translations/cli.py | 19 +- PythonScripts/audit_translations/models.py | 6 +- PythonScripts/audit_translations/parsers.py | 8 - .../audit_translations/tests/test_auditor.py | 4 - .../audit_translations/tests/test_differ.py | 145 +++++++++++ .../tests/test_extractors.py | 66 +++++ .../audit_translations/tests/test_parsers.py | 225 +----------------- 8 files changed, 242 insertions(+), 244 deletions(-) create mode 100644 PythonScripts/audit_translations/tests/test_differ.py create mode 100644 PythonScripts/audit_translations/tests/test_extractors.py diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py index bfe6501d..3c333628 100644 --- a/PythonScripts/audit_translations/auditor.py +++ b/PythonScripts/audit_translations/auditor.py @@ -5,11 +5,10 @@ and for performing full language audits. """ -import sys from pathlib import Path from .differ import diff_rules -from .models import AuditSummary, ComparisonResult, RuleInfo +from .models import AuditError, AuditSummary, ComparisonResult, RuleInfo from .parsers import parse_yaml_file from .renderer import console, print_audit_header, print_audit_summary, print_language_list, print_warnings @@ -132,7 +131,6 @@ def merge_rules(base_rules: list[RuleInfo], region_rules: list[RuleInfo]) -> lis extra_rules=extra_rules, untranslated_text=untranslated_text, rule_differences=rule_differences, - file_path=translated_path, english_rule_count=len(english_rules), translated_rule_count=len(translated_rules), ) @@ -155,16 +153,13 @@ def audit_language( english_region_dir = english_dir / region if region else None if not english_dir.exists(): - console.print(f"\n[red]✗ Error:[/] English rules directory not found: {english_dir}") - sys.exit(1) + raise AuditError(f"English rules directory not found: {english_dir}") if not translated_dir.exists(): - console.print(f"\n[red]✗ Error:[/] Translation directory not found: {translated_dir}") - sys.exit(1) + raise AuditError(f"Translation directory not found: {translated_dir}") if region and not (translated_region_dir and translated_region_dir.exists()): - console.print(f"\n[red]✗ Error:[/] Region directory not found: {translated_region_dir}") - sys.exit(1) + raise AuditError(f"Region directory not found: {translated_region_dir}") # Get list of files to audit files = [specific_file] if specific_file else get_yaml_files(english_dir, english_region_dir) diff --git a/PythonScripts/audit_translations/cli.py b/PythonScripts/audit_translations/cli.py index ed04c402..711089ca 100644 --- a/PythonScripts/audit_translations/cli.py +++ b/PythonScripts/audit_translations/cli.py @@ -8,6 +8,7 @@ import sys from .auditor import audit_language, list_languages +from .models import AuditError from .renderer import console @@ -59,10 +60,14 @@ def main() -> None: sys.exit(1) issue_filter = set(tokens) - audit_language( - args.language, - args.specific_file, - args.rules_dir, - issue_filter, - args.verbose, - ) + try: + audit_language( + args.language, + args.specific_file, + args.rules_dir, + issue_filter, + args.verbose, + ) + except AuditError as exc: + console.print(f"\n[red]✗ Error:[/] {exc}") + sys.exit(1) diff --git a/PythonScripts/audit_translations/models.py b/PythonScripts/audit_translations/models.py index 4df57174..a54dadb7 100644 --- a/PythonScripts/audit_translations/models.py +++ b/PythonScripts/audit_translations/models.py @@ -6,10 +6,13 @@ from dataclasses import dataclass, field from enum import StrEnum -from pathlib import Path from typing import Any +class AuditError(Exception): + """Raised when the audit encounters a configuration or validation error.""" + + class IssueType(StrEnum): """Top-level issue categories used by the audit renderer.""" @@ -109,7 +112,6 @@ class ComparisonResult: missing_rules: list[RuleInfo] # Rules in English but not in translation extra_rules: list[RuleInfo] # Rules in translation but not in English untranslated_text: list[tuple[RuleInfo, list[UntranslatedEntry]]] - file_path: Path | str english_rule_count: int translated_rule_count: int rule_differences: list[RuleDifference] = field(default_factory=list) # Fine-grained diffs diff --git a/PythonScripts/audit_translations/parsers.py b/PythonScripts/audit_translations/parsers.py index 590bc085..421ebb77 100644 --- a/PythonScripts/audit_translations/parsers.py +++ b/PythonScripts/audit_translations/parsers.py @@ -130,14 +130,6 @@ def has_audit_ignore(content: str) -> bool: return "# audit-ignore" in content -def find_untranslated_text_values(node: Any) -> list[str]: - """ - Find lowercase text keys (t, ot, ct, spell, pronounce, ifthenelse) that should be uppercase in translations. - Returns list of the untranslated text values found. - """ - return [entry.text for entry in find_untranslated_text_entries(node)] - - def find_untranslated_text_entries(node: Any) -> list[UntranslatedEntry]: """ Find lowercase text keys (t, ot, ct, spell, pronounce, ifthenelse) and their line numbers. diff --git a/PythonScripts/audit_translations/tests/test_auditor.py b/PythonScripts/audit_translations/tests/test_auditor.py index 0214bf56..e1cd94b7 100644 --- a/PythonScripts/audit_translations/tests/test_auditor.py +++ b/PythonScripts/audit_translations/tests/test_auditor.py @@ -93,7 +93,6 @@ def test_comparison_result_object_fields() -> None: extra_rules=[extra], untranslated_text=[(untranslated, [UntranslatedEntry("t", "x", 31)])], rule_differences=[diff], - file_path="", english_rule_count=1, translated_rule_count=1, ) @@ -592,7 +591,6 @@ def test_print_warnings_groups_multiple_subgroups_for_single_rule(fixed_console_ extra_rules=[], untranslated_text=[(tr, [UntranslatedEntry("t", "first", 24), UntranslatedEntry("ct", "second", 25)])], rule_differences=diffs, - file_path="", english_rule_count=1, translated_rule_count=1, ) @@ -644,7 +642,6 @@ def test_print_warnings_groups_missing_and_extra_by_rule(fixed_console_width) -> extra_rules=[extra], untranslated_text=[], rule_differences=[diff], - file_path="", english_rule_count=2, translated_rule_count=2, ) @@ -689,7 +686,6 @@ def test_print_warnings_verbose_shows_snippets_only_for_differences(fixed_consol extra_rules=[], untranslated_text=[(tr_untranslated, [UntranslatedEntry("t", "leave me", 21)])], rule_differences=[diff], - file_path="", english_rule_count=2, translated_rule_count=2, ) diff --git a/PythonScripts/audit_translations/tests/test_differ.py b/PythonScripts/audit_translations/tests/test_differ.py new file mode 100644 index 00000000..eb7bc3a2 --- /dev/null +++ b/PythonScripts/audit_translations/tests/test_differ.py @@ -0,0 +1,145 @@ +""" +Tests for differ.py. +""" + +from ..differ import diff_rules +from ..models import RuleDifference, RuleInfo + + +def make_rule(name: str, tag: str, data) -> RuleInfo: + """Helper to create RuleInfo for testing""" + return RuleInfo( + name=name, + tag=tag, + key=f"{name}|{tag}", + line_number=1, + raw_content="", + data=data, + ) + + +class TestDiffRules: + def test_identical_rules_no_diff(self): + """Ensure identical rules no diff.""" + data = {"name": "test", "tag": "mo", "match": "self::m:mo", "replace": [{"T": "text"}]} + en = make_rule("test", "mo", data) + tr = make_rule("test", "mo", data) + assert diff_rules(en, tr) == [] + + def test_detects_match_pattern_difference(self): + """Ensure detects match pattern difference.""" + en = make_rule("test", "mo", {"match": "self::m:mo"}) + tr = make_rule("test", "mo", {"match": "self::m:mi"}) + diffs = diff_rules(en, tr) + assert len(diffs) == 1 + assert diffs[0].diff_type == "match" + assert "self::m:mo" in diffs[0].english_snippet + assert "self::m:mi" in diffs[0].translated_snippet + + def test_detects_condition_difference(self): + """Ensure detects condition difference.""" + en = make_rule("test", "mo", {"if": "condition1"}) + tr = make_rule("test", "mo", {"if": "condition2"}) + diffs = diff_rules(en, tr) + assert any(d.diff_type == "condition" for d in diffs) + + def test_condition_snippet_preserves_rule_order(self): + """ + Condition snippets should preserve the order seen in each rule. + Originally, alphabetical order was used, which is not very helpful. + """ + en = make_rule( + "test", + "mo", + { + "test": { + "if": "condition_b", + "then": [ + { + "test": { + "if": "condition_a", + "then": [{"T": "x"}], + } + } + ], + } + }, + ) + tr = make_rule("test", "mo", {"if": "condition_c"}) + diffs: list[RuleDifference] = diff_rules(en, tr) + cond_diff: RuleDifference = next(d for d in diffs if d.diff_type == "condition") + assert cond_diff.english_snippet == "condition_b, condition_a" + assert cond_diff.translated_snippet == "condition_c" + + def test_condition_snippet_deduplicates_repeated_conditions(self): + """ + Repeated conditions should be shown once, in first-seen order. + """ + en = make_rule( + "test", + "mo", + { + "test": { + "if": "condition_a", + "then": [ + { + "test": { + "if": "condition_a", + "then": [{"T": "x"}], + } + }, + { + "test": { + "if": "condition_b", + "then": [{"T": "y"}], + } + }, + ], + } + }, + ) + tr = make_rule("test", "mo", {"if": "condition_c"}) + diffs: list[RuleDifference] = diff_rules(en, tr) + cond_diff: RuleDifference = next(d for d in diffs if d.diff_type == "condition") + + # without deduplication, we'd have "condition_a" repeated. + assert cond_diff.english_snippet == "condition_a, condition_b" + assert cond_diff.translated_snippet == "condition_c" + + def test_detects_missing_condition(self): + """Ensure detects missing condition.""" + en = make_rule("test", "mo", {"if": "condition1"}) + tr = make_rule("test", "mo", {"replace": [{"T": "text"}]}) + diffs = diff_rules(en, tr) + assert any(d.diff_type == "condition" for d in diffs) + + def test_detects_variable_difference(self): + """Ensure detects variable difference.""" + en = make_rule("test", "mo", {"variables": [{"foo": "bar"}]}) + tr = make_rule("test", "mo", {"variables": [{"baz": "qux"}]}) + diffs = diff_rules(en, tr) + assert any(d.diff_type == "variables" for d in diffs) + + def test_detects_structure_difference(self): + """Ensure detects structure difference.""" + en = make_rule("test", "mo", {"test": {"if": "cond", "then": [{"T": "yes"}], "else": [{"T": "no"}]}}) + tr = make_rule("test", "mo", {"test": {"if": "cond", "then": [{"T": "ja"}]}}) + diffs = diff_rules(en, tr) + assert any(d.diff_type == "structure" for d in diffs) + + def test_multiple_differences(self): + """Ensure multiple differences.""" + en = make_rule("test", "mo", {"match": "self::m:mo", "if": "cond1"}) + tr = make_rule("test", "mo", {"match": "self::m:mi", "if": "cond2"}) + diffs = diff_rules(en, tr) + assert len(diffs) == 2 + types = {d.diff_type for d in diffs} + assert "match" in types + assert "condition" in types + + def test_ignores_text_content_differences(self): + """Ensure ignores text content differences.""" + en = make_rule("test", "mo", {"replace": [{"T": "hello"}]}) + tr = make_rule("test", "mo", {"replace": [{"T": "hallo"}]}) + diffs = diff_rules(en, tr) + assert diffs == [] # text differences are intentional translations diff --git a/PythonScripts/audit_translations/tests/test_extractors.py b/PythonScripts/audit_translations/tests/test_extractors.py new file mode 100644 index 00000000..0bc0079e --- /dev/null +++ b/PythonScripts/audit_translations/tests/test_extractors.py @@ -0,0 +1,66 @@ +""" +Tests for extractors.py. +""" + +from ..extractors import ( + extract_conditions, + extract_match_pattern, + extract_structure_elements, + extract_variables, +) + + +class TestExtractMatchPattern: + def test_extracts_inline_match(self): + """Ensure extracts inline match.""" + data = {"match": "self::m:mo"} + assert extract_match_pattern(data) == "self::m:mo" + + def test_extracts_array_match(self): + """Ensure extracts array match.""" + data = {"match": ["self::m:mo", "@intent"]} + assert extract_match_pattern(data) == "self::m:mo @intent" + + def test_returns_empty_for_no_match(self): + """Ensure returns empty for no match.""" + data = {"replace": [{"T": "text"}]} + assert extract_match_pattern(data) == "" + + +class TestExtractConditions: + def test_extracts_single_condition(self): + """Ensure extracts single condition.""" + data = {"if": "$Verbosity"} + assert extract_conditions(data) == ["$Verbosity"] + + def test_extracts_multiple_conditions(self): + """Ensure extracts multiple conditions.""" + data = {"if": "condition1", "then": "something", "else_test": {"if": "condition2"}} + conditions = extract_conditions(data) + assert "condition1" in conditions + assert "condition2" in conditions + + +class TestExtractVariables: + def test_extracts_variables(self): + """Ensure extracts variables.""" + data = {"variables": [{"name": "value"}, {"other": "val2"}]} + variables = extract_variables(data) + assert ("name", "value") in variables + assert ("other", "val2") in variables + + def test_returns_empty_for_no_variables(self): + """Ensure returns empty for no variables.""" + data = {"match": "."} + assert extract_variables(data) == [] + + +class TestExtractStructureElements: + def test_extracts_test_structure(self): + """Ensure extracts test structure.""" + data = {"test": {"if": "condition", "then": [{"T": "yes"}], "else": [{"T": "no"}]}} + elements = extract_structure_elements(data) + assert "test:" in elements + assert "if:" in elements + assert "then:" in elements + assert "else:" in elements diff --git a/PythonScripts/audit_translations/tests/test_parsers.py b/PythonScripts/audit_translations/tests/test_parsers.py index 9315ce5c..ed252cde 100644 --- a/PythonScripts/audit_translations/tests/test_parsers.py +++ b/PythonScripts/audit_translations/tests/test_parsers.py @@ -6,18 +6,10 @@ from ruamel.yaml import YAML from ruamel.yaml.scanner import ScannerError -from ..differ import diff_rules -from ..extractors import ( - extract_conditions, - extract_match_pattern, - extract_structure_elements, - extract_variables, -) -from ..models import RuleDifference, RuleInfo, UntranslatedEntry +from ..models import UntranslatedEntry from ..parsers import ( build_line_map, find_untranslated_text_entries, - find_untranslated_text_values, has_audit_ignore, parse_rules_file, parse_unicode_file, @@ -47,27 +39,27 @@ class TestFindUntranslatedTextKeys: def test_finds_lowercase_t(self): """Ensure finds lowercase t.""" content = {"t": "hello world"} - assert find_untranslated_text_values(content) == ["hello world"] + assert [e.text for e in find_untranslated_text_entries(content)] == ["hello world"] def test_finds_lowercase_ot(self): """Ensure finds lowercase ot.""" content = {"ot": "open paren"} - assert find_untranslated_text_values(content) == ["open paren"] + assert [e.text for e in find_untranslated_text_entries(content)] == ["open paren"] def test_finds_lowercase_ct(self): """Ensure finds lowercase ct.""" content = {"ct": "close paren"} - assert find_untranslated_text_values(content) == ["close paren"] + assert [e.text for e in find_untranslated_text_entries(content)] == ["close paren"] def test_finds_multiple(self): """Ensure finds multiple.""" content = {"t": "one", "ot": "two", "ct": "three"} - assert set(find_untranslated_text_values(content)) == {"one", "two", "three"} + assert {e.text for e in find_untranslated_text_entries(content)} == {"one", "two", "three"} def test_ignores_uppercase_T(self): """Ensure ignores uppercase T.""" content = {"T": "translated"} - assert find_untranslated_text_values(content) == [] + assert [e.text for e in find_untranslated_text_entries(content)] == [] def test_finds_spell_and_pronounce(self): """Detects lowercase spell and pronounce markers. @@ -75,7 +67,7 @@ def test_finds_spell_and_pronounce(self): Extends coverage beyond basic t/ot/ct fields. Flags auxiliary translation-bearing keys.""" content = {"spell": "alpha", "pronounce": "beta"} - assert set(find_untranslated_text_values(content)) == {"alpha", "beta"} + assert {e.text for e in find_untranslated_text_entries(content)} == {"alpha", "beta"} def test_ignores_uppercase_variants(self): """Ignores uppercase variants of extended markers. @@ -83,22 +75,22 @@ def test_ignores_uppercase_variants(self): Honors already-verified spell/pronounce/IfThenElse content. Avoids double-reporting translated data.""" content = {"PRONOUNCE": "gamma", "IFTHENELSE": "delta"} - assert find_untranslated_text_values(content) == [] + assert [e.text for e in find_untranslated_text_entries(content)] == [] def test_ignores_variable_references(self): """Ensure ignores variable references.""" content = {"t": "$variable"} - assert find_untranslated_text_values(content) == [] + assert [e.text for e in find_untranslated_text_entries(content)] == [] def test_ignores_xpath_expressions(self): """Ensure ignores xpath expressions.""" content = {"t": "@attr"} - assert find_untranslated_text_values(content) == [] + assert [e.text for e in find_untranslated_text_entries(content)] == [] def test_ignores_single_punctuation(self): """Ensure ignores single punctuation.""" content = {"t": "."} - assert find_untranslated_text_values(content) == [] + assert [e.text for e in find_untranslated_text_entries(content)] == [] def test_finds_entries_with_lines(self): """Ensure finds entries with line numbers.""" @@ -347,62 +339,6 @@ def test_mixed_valid_and_skipped_items(self): assert rules[1].line_number == 7 -class TestExtractMatchPattern: - def test_extracts_inline_match(self): - """Ensure extracts inline match.""" - data = {"match": "self::m:mo"} - assert extract_match_pattern(data) == "self::m:mo" - - def test_extracts_array_match(self): - """Ensure extracts array match.""" - data = {"match": ["self::m:mo", "@intent"]} - assert extract_match_pattern(data) == "self::m:mo @intent" - - def test_returns_empty_for_no_match(self): - """Ensure returns empty for no match.""" - data = {"replace": [{"T": "text"}]} - assert extract_match_pattern(data) == "" - - -class TestExtractConditions: - def test_extracts_single_condition(self): - """Ensure extracts single condition.""" - data = {"if": "$Verbosity"} - assert extract_conditions(data) == ["$Verbosity"] - - def test_extracts_multiple_conditions(self): - """Ensure extracts multiple conditions.""" - data = {"if": "condition1", "then": "something", "else_test": {"if": "condition2"}} - conditions = extract_conditions(data) - assert "condition1" in conditions - assert "condition2" in conditions - - -class TestExtractVariables: - def test_extracts_variables(self): - """Ensure extracts variables.""" - data = {"variables": [{"name": "value"}, {"other": "val2"}]} - variables = extract_variables(data) - assert ("name", "value") in variables - assert ("other", "val2") in variables - - def test_returns_empty_for_no_variables(self): - """Ensure returns empty for no variables.""" - data = {"match": "."} - assert extract_variables(data) == [] - - -class TestExtractStructureElements: - def test_extracts_test_structure(self): - """Ensure extracts test structure.""" - data = {"test": {"if": "condition", "then": [{"T": "yes"}], "else": [{"T": "no"}]}} - elements = extract_structure_elements(data) - assert "test:" in elements - assert "if:" in elements - assert "then:" in elements - assert "else:" in elements - - class TestBuildLineMap: def test_builds_line_map_for_rule_elements(self): """Ensure line map captures nested element lines.""" @@ -425,142 +361,3 @@ def test_builds_line_map_for_rule_elements(self): assert line_map["variables"] == [5] assert line_map["structure:test"] == [7] assert line_map["structure:if"] == [4, 8] - - -def make_rule(name: str, tag: str, data) -> RuleInfo: - """Helper to create RuleInfo for testing""" - return RuleInfo( - name=name, - tag=tag, - key=f"{name}|{tag}", - line_number=1, - raw_content="", - data=data, - ) - - -class TestDiffRules: - def test_identical_rules_no_diff(self): - """Ensure identical rules no diff.""" - data = {"name": "test", "tag": "mo", "match": "self::m:mo", "replace": [{"T": "text"}]} - en = make_rule("test", "mo", data) - tr = make_rule("test", "mo", data) - assert diff_rules(en, tr) == [] - - def test_detects_match_pattern_difference(self): - """Ensure detects match pattern difference.""" - en = make_rule("test", "mo", {"match": "self::m:mo"}) - tr = make_rule("test", "mo", {"match": "self::m:mi"}) - diffs = diff_rules(en, tr) - assert len(diffs) == 1 - assert diffs[0].diff_type == "match" - assert "self::m:mo" in diffs[0].english_snippet - assert "self::m:mi" in diffs[0].translated_snippet - - def test_detects_condition_difference(self): - """Ensure detects condition difference.""" - en = make_rule("test", "mo", {"if": "condition1"}) - tr = make_rule("test", "mo", {"if": "condition2"}) - diffs = diff_rules(en, tr) - assert any(d.diff_type == "condition" for d in diffs) - - def test_condition_snippet_preserves_rule_order(self): - """ - Condition snippets should preserve the order seen in each rule. - Originally, alphabetical order was used, which is not very helpful. - """ - en = make_rule( - "test", - "mo", - { - "test": { - "if": "condition_b", - "then": [ - { - "test": { - "if": "condition_a", - "then": [{"T": "x"}], - } - } - ], - } - }, - ) - tr = make_rule("test", "mo", {"if": "condition_c"}) - diffs: list[RuleDifference] = diff_rules(en, tr) - cond_diff: RuleDifference = next(d for d in diffs if d.diff_type == "condition") - assert cond_diff.english_snippet == "condition_b, condition_a" - assert cond_diff.translated_snippet == "condition_c" - - def test_condition_snippet_deduplicates_repeated_conditions(self): - """ - Repeated conditions should be shown once, in first-seen order. - """ - en = make_rule( - "test", - "mo", - { - "test": { - "if": "condition_a", - "then": [ - { - "test": { - "if": "condition_a", - "then": [{"T": "x"}], - } - }, - { - "test": { - "if": "condition_b", - "then": [{"T": "y"}], - } - }, - ], - } - }, - ) - tr = make_rule("test", "mo", {"if": "condition_c"}) - diffs: list[RuleDifference] = diff_rules(en, tr) - cond_diff: RuleDifference = next(d for d in diffs if d.diff_type == "condition") - - # without deduplication, we'd have "condition_a" repeated. - assert cond_diff.english_snippet == "condition_a, condition_b" - assert cond_diff.translated_snippet == "condition_c" - - def test_detects_missing_condition(self): - """Ensure detects missing condition.""" - en = make_rule("test", "mo", {"if": "condition1"}) - tr = make_rule("test", "mo", {"replace": [{"T": "text"}]}) - diffs = diff_rules(en, tr) - assert any(d.diff_type == "condition" for d in diffs) - - def test_detects_variable_difference(self): - """Ensure detects variable difference.""" - en = make_rule("test", "mo", {"variables": [{"foo": "bar"}]}) - tr = make_rule("test", "mo", {"variables": [{"baz": "qux"}]}) - diffs = diff_rules(en, tr) - assert any(d.diff_type == "variables" for d in diffs) - - def test_detects_structure_difference(self): - """Ensure detects structure difference.""" - en = make_rule("test", "mo", {"test": {"if": "cond", "then": [{"T": "yes"}], "else": [{"T": "no"}]}}) - tr = make_rule("test", "mo", {"test": {"if": "cond", "then": [{"T": "ja"}]}}) - diffs = diff_rules(en, tr) - assert any(d.diff_type == "structure" for d in diffs) - - def test_multiple_differences(self): - """Ensure multiple differences.""" - en = make_rule("test", "mo", {"match": "self::m:mo", "if": "cond1"}) - tr = make_rule("test", "mo", {"match": "self::m:mi", "if": "cond2"}) - diffs = diff_rules(en, tr) - assert len(diffs) == 2 - types = {d.diff_type for d in diffs} - assert "match" in types - assert "condition" in types - - def test_ignores_text_content_differences(self): - """Ensure ignores text content differences.""" - en = make_rule("test", "mo", {"replace": [{"T": "hello"}]}) - tr = make_rule("test", "mo", {"replace": [{"T": "hallo"}]}) - diffs = diff_rules(en, tr) - assert diffs == [] # text differences are intentional translations From 7e70bed74373ecc373bb1762dacf4ca6dfb6861a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Gro=C3=9F?= Date: Thu, 5 Mar 2026 04:55:46 +0100 Subject: [PATCH 7/7] use Enum instead of str --- PythonScripts/audit_translations/__init__.py | 5 +-- PythonScripts/audit_translations/cli.py | 1 + .../audit_translations/line_resolver.py | 32 +++++++++---------- .../audit_translations/tests/conftest.py | 4 +++ PythonScripts/pyproject.toml | 1 - PythonScripts/uv.lock | 11 ------- 6 files changed, 22 insertions(+), 32 deletions(-) create mode 100644 PythonScripts/audit_translations/tests/conftest.py diff --git a/PythonScripts/audit_translations/__init__.py b/PythonScripts/audit_translations/__init__.py index cd6dab62..1d076494 100644 --- a/PythonScripts/audit_translations/__init__.py +++ b/PythonScripts/audit_translations/__init__.py @@ -8,10 +8,7 @@ Read README.md for more details. """ -import sys - -sys.stdout.reconfigure(encoding="utf-8") -from .cli import main # noqa: E402 +from .cli import main __all__ = [ "main", diff --git a/PythonScripts/audit_translations/cli.py b/PythonScripts/audit_translations/cli.py index 711089ca..aaba1444 100644 --- a/PythonScripts/audit_translations/cli.py +++ b/PythonScripts/audit_translations/cli.py @@ -14,6 +14,7 @@ def main() -> None: """Main entry point for the audit tool""" + sys.stdout.reconfigure(encoding="utf-8") parser = argparse.ArgumentParser( description="Audit MathCAT translation files against English originals", diff --git a/PythonScripts/audit_translations/line_resolver.py b/PythonScripts/audit_translations/line_resolver.py index da25d200..ee22c59c 100644 --- a/PythonScripts/audit_translations/line_resolver.py +++ b/PythonScripts/audit_translations/line_resolver.py @@ -4,15 +4,15 @@ Maps rule diff types and structure tokens to precise YAML source line numbers. """ -from .models import DiffType, RuleDifference, RuleInfo from .extractors import extract_structure_elements +from .models import DiffType, RuleDifference, RuleInfo -def _get_line_map_lines(rule: RuleInfo, kind: DiffType | str, token: str | None = None) -> list[int]: +def _get_line_map_lines(rule: RuleInfo, kind: DiffType, token: str | None = None) -> list[int]: """Return the line-number list for a given element kind from the rule's line map.""" - if kind in ("match", "condition", "variables"): + if kind in (DiffType.MATCH, DiffType.CONDITION, DiffType.VARIABLES): return rule.line_map.get(kind, []) - if kind == "structure" and token: + if kind == DiffType.STRUCTURE and token: return rule.line_map.get(f"structure:{token.rstrip(':')}", []) return [] @@ -40,7 +40,7 @@ def first_structure_mismatch( def resolve_issue_line_at_position( rule: RuleInfo, - kind: DiffType | str, + kind: DiffType, token: str | None = None, position: int = 0, ) -> int | None: @@ -60,7 +60,7 @@ def resolve_issue_line_at_position( return lines[position] if position < len(lines) else None -def resolve_issue_line(rule: RuleInfo, kind: DiffType | str, token: str | None = None) -> int | None: +def resolve_issue_line(rule: RuleInfo, kind: DiffType, token: str | None = None) -> int | None: """ Resolve the line number for an issue within a rule. @@ -69,7 +69,7 @@ def resolve_issue_line(rule: RuleInfo, kind: DiffType | str, token: str | None = to rule.line_number to avoid misleading line numbers when elements are missing. """ lines = _get_line_map_lines(rule, kind, token) - if kind == "structure" and token: + if kind == DiffType.STRUCTURE and token: return lines[0] if lines else None return lines[0] if lines else rule.line_number @@ -115,27 +115,27 @@ def resolve_structure_issue_lines(diff: RuleDifference) -> tuple[int, int] | Non en_occ = structure_token_occurrence_index(en_tokens, anchor_pos) tr_occ = structure_token_occurrence_index(tr_tokens, anchor_pos) if en_occ is not None and tr_occ is not None: - line_en = resolve_issue_line_at_position(diff.english_rule, "structure", anchor_token, en_occ) - line_tr = resolve_issue_line_at_position(diff.translated_rule, "structure", anchor_token, tr_occ) + line_en = resolve_issue_line_at_position(diff.english_rule, DiffType.STRUCTURE, anchor_token, en_occ) + line_tr = resolve_issue_line_at_position(diff.translated_rule, DiffType.STRUCTURE, anchor_token, tr_occ) if line_en is not None and line_tr is not None: return line_en, line_tr # Fallback: anchor both sides to replace, which is the rule body entrypoint. - line_en = resolve_issue_line(diff.english_rule, "structure", "replace:") or diff.english_rule.line_number - line_tr = resolve_issue_line(diff.translated_rule, "structure", "replace:") or diff.translated_rule.line_number + line_en = resolve_issue_line(diff.english_rule, DiffType.STRUCTURE, "replace:") or diff.english_rule.line_number + line_tr = resolve_issue_line(diff.translated_rule, DiffType.STRUCTURE, "replace:") or diff.translated_rule.line_number return line_en, line_tr # Exact token available on both sides: resolve by occurrence index at mismatch. en_occ = structure_token_occurrence_index(en_tokens, mismatch_pos) tr_occ = structure_token_occurrence_index(tr_tokens, mismatch_pos) if en_occ is not None and tr_occ is not None: - line_en = resolve_issue_line_at_position(diff.english_rule, "structure", en_token, en_occ) - line_tr = resolve_issue_line_at_position(diff.translated_rule, "structure", tr_token, tr_occ) + line_en = resolve_issue_line_at_position(diff.english_rule, DiffType.STRUCTURE, en_token, en_occ) + line_tr = resolve_issue_line_at_position(diff.translated_rule, DiffType.STRUCTURE, tr_token, tr_occ) if line_en is not None and line_tr is not None: return line_en, line_tr - line_en = resolve_issue_line(diff.english_rule, "structure", en_token) - line_tr = resolve_issue_line(diff.translated_rule, "structure", tr_token) + line_en = resolve_issue_line(diff.english_rule, DiffType.STRUCTURE, en_token) + line_tr = resolve_issue_line(diff.translated_rule, DiffType.STRUCTURE, tr_token) if line_en is None or line_tr is None: return None return line_en, line_tr @@ -149,7 +149,7 @@ def resolve_diff_lines(diff: RuleDifference) -> tuple[int | None, int | None] | This is the single entry point used by the renderer to avoid duplicating the structure vs non-structure branching logic. """ - if diff.diff_type == "structure": + if diff.diff_type == DiffType.STRUCTURE: return resolve_structure_issue_lines(diff) return ( resolve_issue_line(diff.english_rule, diff.diff_type), diff --git a/PythonScripts/audit_translations/tests/conftest.py b/PythonScripts/audit_translations/tests/conftest.py new file mode 100644 index 00000000..c58afaad --- /dev/null +++ b/PythonScripts/audit_translations/tests/conftest.py @@ -0,0 +1,4 @@ +import sys + +# needed for running tests on Windows +sys.stdout.reconfigure(encoding="utf-8") diff --git a/PythonScripts/pyproject.toml b/PythonScripts/pyproject.toml index b4d422a3..6f919ee0 100644 --- a/PythonScripts/pyproject.toml +++ b/PythonScripts/pyproject.toml @@ -10,7 +10,6 @@ license = "MIT" readme = "README.md" requires-python = ">=3.14" dependencies = [ - "attrs", "beautifulsoup4", "deepl", "googletrans", diff --git a/PythonScripts/uv.lock b/PythonScripts/uv.lock index 9b09e2b8..993f3f44 100644 --- a/PythonScripts/uv.lock +++ b/PythonScripts/uv.lock @@ -14,15 +14,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" }, ] -[[package]] -name = "attrs" -version = "25.4.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" }, -] - [[package]] name = "beautifulsoup4" version = "4.14.3" @@ -283,7 +274,6 @@ name = "pythonscripts" version = "0.1.0" source = { editable = "." } dependencies = [ - { name = "attrs" }, { name = "beautifulsoup4" }, { name = "deepl" }, { name = "googletrans" }, @@ -302,7 +292,6 @@ dev = [ [package.metadata] requires-dist = [ - { name = "attrs" }, { name = "beautifulsoup4" }, { name = "deepl" }, { name = "googletrans" },