From 0c19972562364e4c0b1621b25a81ad842d421ee5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moritz=20Gro=C3=9F?= <hi@mgross.dev>
Date: Thu, 5 Mar 2026 03:55:24 +0100
Subject: [PATCH 1/7] use Path instead of str. add UntranslatedEntry dataclass

---
 PythonScripts/audit_translations/auditor.py   | 27 ++++-----
 PythonScripts/audit_translations/cli.py       |  7 +--
 .../audit_translations/line_resolver.py       |  2 +-
 .../{dataclasses.py => models.py}             | 24 +++++---
 PythonScripts/audit_translations/parsers.py   | 19 +++----
 PythonScripts/audit_translations/renderer.py  |  6 +-
 .../audit_translations/tests/test_auditor.py  | 57 ++++++++++---------
 .../tests/test_cli_end_to_end.py              |  2 +-
 .../tests/test_line_resolver.py               |  2 +-
 .../tests/test_output_objects.py              |  8 +--
 .../audit_translations/tests/test_parsers.py  | 10 ++--
 11 files changed, 84 insertions(+), 80 deletions(-)
 rename PythonScripts/audit_translations/{dataclasses.py => models.py} (84%)

diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py
index d30126c5..223c4b75 100644
--- a/PythonScripts/audit_translations/auditor.py
+++ b/PythonScripts/audit_translations/auditor.py
@@ -11,13 +11,10 @@
 from rich.panel import Panel
 from rich.table import Table
 
-from .dataclasses import ComparisonResult, RuleInfo
+from .models import ComparisonResult, RuleInfo
 from .parsers import diff_rules, parse_yaml_file
 from .renderer import console, print_warnings
 
-# Re-export console so existing `from .auditor import console` callers keep working.
-__all__ = ["console"]
-
 GREEN_FILE_COUNT_THRESHOLD = 7
 YELLOW_FILE_COUNT_THRESHOLD = 4
 
@@ -72,16 +69,16 @@ def collect_from(directory: Path, root: Path) -> None:
 
 
 def compare_files(
-    english_path: str,
-    translated_path: str,
+    english_path: Path,
+    translated_path: Path,
     issue_filter: set[str] | None = None,
-    translated_region_path: str | None = None,
-    english_region_path: str | None = None,
+    translated_region_path: Path | None = None,
+    english_region_path: Path | None = None,
 ) -> ComparisonResult:
     """Compare English and translated YAML files"""
 
-    def load_rules(path: str | None) -> list[RuleInfo]:
-        if path and Path(path).exists():
+    def load_rules(path: Path | None) -> list[RuleInfo]:
+        if path and path.exists():
             rules, _ = parse_yaml_file(path)
             return rules
         return []
@@ -210,11 +207,11 @@ def audit_language(
             continue
 
         result = compare_files(
-            str(english_path),
-            str(translated_path),
+            english_path,
+            translated_path,
             issue_filter,
-            str(translated_region_path) if translated_region_path and translated_region_path.exists() else None,
-            str(english_region_path) if english_region_path and english_region_path.exists() else None,
+            translated_region_path if translated_region_path and translated_region_path.exists() else None,
+            english_region_path if english_region_path and english_region_path.exists() else None,
         )
 
         if result.has_issues:
@@ -226,7 +223,7 @@ def audit_language(
             files_ok += 1
 
         total_missing += len(result.missing_rules)
-        total_untranslated += sum(len(entries) for _, entries in result.untranslated_text)
+        total_untranslated += sum(len(entries) for _rule, entries in result.untranslated_text)
         total_extra += len(result.extra_rules)
         total_differences += len(result.rule_differences)
 
diff --git a/PythonScripts/audit_translations/cli.py b/PythonScripts/audit_translations/cli.py
index 1ae382a0..ed04c402 100644
--- a/PythonScripts/audit_translations/cli.py
+++ b/PythonScripts/audit_translations/cli.py
@@ -7,7 +7,8 @@
 import argparse
 import sys
 
-from .auditor import audit_language, console, list_languages
+from .auditor import audit_language, list_languages
+from .renderer import console
 
 
 def main() -> None:
@@ -65,7 +66,3 @@ def main() -> None:
             issue_filter,
             args.verbose,
         )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/PythonScripts/audit_translations/line_resolver.py b/PythonScripts/audit_translations/line_resolver.py
index df867fd6..8950049d 100644
--- a/PythonScripts/audit_translations/line_resolver.py
+++ b/PythonScripts/audit_translations/line_resolver.py
@@ -4,7 +4,7 @@
 Maps rule diff types and structure tokens to precise YAML source line numbers.
 """
 
-from .dataclasses import DiffType, RuleDifference, RuleInfo
+from .models import DiffType, RuleDifference, RuleInfo
 from .parsers import extract_structure_elements
 
 
diff --git a/PythonScripts/audit_translations/dataclasses.py b/PythonScripts/audit_translations/models.py
similarity index 84%
rename from PythonScripts/audit_translations/dataclasses.py
rename to PythonScripts/audit_translations/models.py
index 03b27996..e156073c 100644
--- a/PythonScripts/audit_translations/dataclasses.py
+++ b/PythonScripts/audit_translations/models.py
@@ -6,6 +6,7 @@
 
 from dataclasses import dataclass, field
 from enum import StrEnum
+from pathlib import Path
 from typing import Any
 
 
@@ -27,6 +28,15 @@ class DiffType(StrEnum):
     STRUCTURE = "structure"  # Control-flow block shape/order differs (if/then/else/with/replace).
 
 
+@dataclass
+class UntranslatedEntry:
+    """A single untranslated text fragment found in a rule."""
+
+    key: str
+    text: str
+    line: int | None
+
+
 @dataclass
 class RuleInfo:
     """
@@ -46,9 +56,9 @@ class RuleInfo:
         Raw YAML block for this rule (used for reporting/snippets).
     data : Any | None
         Parsed YAML node for the rule; used for structural diffs.
-    untranslated_entries : list[tuple[str, str, int | None]]
-        List of (key, text, line) entries extracted from lowercase translation keys.
-        This preserves exact text fragments and YAML line numbers for diagnostics.
+    untranslated_entries : list[UntranslatedEntry]
+        Entries extracted from lowercase translation keys.
+        Preserves exact text fragments and YAML line numbers for diagnostics.
     line_map : dict[str, list[int]]
         Mapping of element type to line numbers for rule components like match,
         conditions, variables, and structural tokens. This is used to point
@@ -63,7 +73,7 @@ class RuleInfo:
     line_number: int
     raw_content: str
     data: Any | None = None
-    untranslated_entries: list[tuple[str, str, int | None]] = field(default_factory=list)
+    untranslated_entries: list[UntranslatedEntry] = field(default_factory=list)
     line_map: dict[str, list[int]] = field(default_factory=dict)
     audit_ignore: bool = False
 
@@ -73,7 +83,7 @@ def has_untranslated_text(self) -> bool:
 
     @property
     def untranslated_keys(self) -> list[str]:
-        return [entry[1] for entry in self.untranslated_entries]
+        return [entry.text for entry in self.untranslated_entries]
 
 
 @dataclass
@@ -98,8 +108,8 @@ class ComparisonResult:
 
     missing_rules: list[RuleInfo]  # Rules in English but not in translation
     extra_rules: list[RuleInfo]  # Rules in translation but not in English
-    untranslated_text: list[tuple[RuleInfo, list[tuple[str, str, int | None]]]]  # Rules with lowercase t/ot/ct
-    file_path: str
+    untranslated_text: list[tuple[RuleInfo, list[UntranslatedEntry]]]
+    file_path: Path | str
     english_rule_count: int
     translated_rule_count: int
     rule_differences: list[RuleDifference] = field(default_factory=list)  # Fine-grained diffs
diff --git a/PythonScripts/audit_translations/parsers.py b/PythonScripts/audit_translations/parsers.py
index e1540300..f8142da3 100644
--- a/PythonScripts/audit_translations/parsers.py
+++ b/PythonScripts/audit_translations/parsers.py
@@ -13,7 +13,7 @@
 from ruamel.yaml import YAML
 from ruamel.yaml.scanner import ScannerError
 
-from .dataclasses import DiffType, RuleDifference, RuleInfo
+from .models import DiffType, RuleDifference, RuleInfo, UntranslatedEntry
 
 _yaml = YAML()
 _yaml.preserve_quotes = True
@@ -22,13 +22,12 @@
 _MATCH_EXPR = parse("$.match")
 
 
-def is_unicode_file(file_path: str) -> bool:
+def is_unicode_file(file_path: Path) -> bool:
     """Check if this is a unicode.yaml or unicode-full.yaml file"""
-    basename = Path(file_path).name
-    return basename in ("unicode.yaml", "unicode-full.yaml")
+    return file_path.name in ("unicode.yaml", "unicode-full.yaml")
 
 
-def parse_yaml_file(file_path: str, strict: bool = False) -> tuple[list[RuleInfo], str]:
+def parse_yaml_file(file_path: Path, strict: bool = False) -> tuple[list[RuleInfo], str]:
     """
     Parse a YAML file and extract rules.
     Returns list of RuleInfo and the raw file content.
@@ -165,15 +164,15 @@ def find_untranslated_text_values(node: Any) -> list[str]:
     Find lowercase text keys (t, ot, ct, spell, pronounce, ifthenelse) that should be uppercase in translations.
     Returns list of the untranslated text values found.
     """
-    return [entry[1] for entry in find_untranslated_text_entries(node)]
+    return [entry.text for entry in find_untranslated_text_entries(node)]
 
 
-def find_untranslated_text_entries(node: Any) -> list[tuple[str, str, int | None]]:
+def find_untranslated_text_entries(node: Any) -> list[UntranslatedEntry]:
     """
     Find lowercase text keys (t, ot, ct, spell, pronounce, ifthenelse) and their line numbers.
-    Returns list of (key, text, line_number) entries. Line number is 1-based when available.
+    Returns list of UntranslatedEntry. Line number is 1-based when available.
     """
-    entries: list[tuple[str, str, int | None]] = []
+    entries: list[UntranslatedEntry] = []
     translation_keys = {"t", "ot", "ct", "spell", "pronounce", "ifthenelse"}
 
     def should_add(text: str) -> bool:
@@ -185,7 +184,7 @@ def should_add(text: str) -> bool:
 
     for key, child, parent in iter_field_matches(node):
         if key.lower() in translation_keys and not key.isupper() and isinstance(child, str) and should_add(child):
-            entries.append((key, child, mapping_key_line(parent, key)))
+            entries.append(UntranslatedEntry(key, child, mapping_key_line(parent, key)))
     return entries
 
 
diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py
index aa4135c1..438347bc 100644
--- a/PythonScripts/audit_translations/renderer.py
+++ b/PythonScripts/audit_translations/renderer.py
@@ -10,7 +10,7 @@
 from rich.console import Console
 from rich.markup import escape
 
-from .dataclasses import ComparisonResult, DiffType, IssueType, RuleDifference, RuleInfo
+from .models import ComparisonResult, DiffType, IssueType, RuleDifference, RuleInfo, UntranslatedEntry
 from .line_resolver import resolve_diff_lines
 
 console = Console()
@@ -80,8 +80,8 @@ def add_issue(rule: RuleInfo, group_key: IssueGroupKey, payload: dict[str, Any])
         add_issue(rule, issue_group_key(IssueType.MISSING_RULE), {"line_en": rule.line_number})
 
     for rule, entries in result.untranslated_text:
-        for _, text, line in entries:
-            add_issue(rule, issue_group_key(IssueType.UNTRANSLATED_TEXT), {"line_tr": line or rule.line_number, "text": text})
+        for entry in entries:
+            add_issue(rule, issue_group_key(IssueType.UNTRANSLATED_TEXT), {"line_tr": entry.line or rule.line_number, "text": entry.text})
 
     for diff in result.rule_differences:
         lines = resolve_diff_lines(diff)
diff --git a/PythonScripts/audit_translations/tests/test_auditor.py b/PythonScripts/audit_translations/tests/test_auditor.py
index 0909182d..921ddce5 100644
--- a/PythonScripts/audit_translations/tests/test_auditor.py
+++ b/PythonScripts/audit_translations/tests/test_auditor.py
@@ -6,8 +6,9 @@
 
 import pytest
 
-from ..auditor import compare_files, console, get_yaml_files, list_languages
-from ..dataclasses import ComparisonResult, DiffType, RuleDifference, RuleInfo
+from ..auditor import compare_files, get_yaml_files, list_languages
+from ..renderer import console
+from ..models import ComparisonResult, DiffType, RuleDifference, RuleInfo, UntranslatedEntry
 from ..line_resolver import resolve_diff_lines
 from ..renderer import print_warnings
 
@@ -57,12 +58,12 @@ def aggregate_issue_counts(
     missing = untranslated = extra = diffs = total = 0
     for file_name in files:
         result = compare_files(
-            str(english_dir / file_name),
-            str(translated_dir / file_name),
+            english_dir / file_name,
+            translated_dir / file_name,
             issue_filter,
         )
         missing += len(result.missing_rules)
-        untranslated += sum(len(entries) for _, entries in result.untranslated_text)
+        untranslated += sum(len(entries) for _rule, entries in result.untranslated_text)
         extra += len(result.extra_rules)
         diffs += len(result.rule_differences)
         total += len(result.missing_rules) + len(result.extra_rules) + len(result.rule_differences)
@@ -91,7 +92,7 @@ def test_comparison_result_object_fields() -> None:
     result = ComparisonResult(
         missing_rules=[missing],
         extra_rules=[extra],
-        untranslated_text=[(untranslated, [("t", "x", 31)])],
+        untranslated_text=[(untranslated, [UntranslatedEntry("t", "x", 31)])],
         rule_differences=[diff],
         file_path="",
         english_rule_count=1,
@@ -101,7 +102,7 @@ def test_comparison_result_object_fields() -> None:
     assert result.missing_rules[0].line_number == 10
     assert result.extra_rules[0].line_number == 20
     assert result.untranslated_text[0][0].line_number == 30
-    assert result.untranslated_text[0][1] == [("t", "x", 31)]
+    assert result.untranslated_text[0][1] == [UntranslatedEntry("t", "x", 31)]
     assert result.rule_differences[0].diff_type is DiffType.MATCH
     assert resolve_diff_lines(result.rule_differences[0]) == (40, 41)
 
@@ -187,10 +188,10 @@ def test_compare_files_merges_region_rules(tmp_path) -> None:
     )
 
     result = compare_files(
-        str(english_file),
-        str(translated_file),
+        english_file,
+        translated_file,
         None,
-        str(translated_region_file),
+        translated_region_file,
     )
 
     assert result.missing_rules == []
@@ -227,7 +228,7 @@ def test_compare_files_skips_untranslated_and_diffs_when_audit_ignored(tmp_path)
         encoding="utf-8",
     )
 
-    result = compare_files(str(english_file), str(translated_file))
+    result = compare_files(english_file, translated_file)
 
     assert result.missing_rules == []
     assert result.extra_rules == []
@@ -315,8 +316,8 @@ def test_print_warnings_omits_snippets_when_not_verbose(fixed_console_width) ->
     fixtures_dir = base_dir / "fixtures"
     golden_path = base_dir / "golden" / "rich" / "structure_diff_nonverbose.golden"
     result = compare_files(
-        str(fixtures_dir / "en" / "structure_diff.yaml"),
-        str(fixtures_dir / "de" / "structure_diff.yaml"),
+        fixtures_dir / "en" / "structure_diff.yaml",
+        fixtures_dir / "de" / "structure_diff.yaml",
     )
 
     with console.capture() as capture:
@@ -336,8 +337,8 @@ def test_print_warnings_includes_snippets_when_verbose(fixed_console_width) -> N
     fixtures_dir = base_dir / "fixtures"
     golden_path = base_dir / "golden" / "rich" / "structure_diff_verbose.golden"
     result = compare_files(
-        str(fixtures_dir / "en" / "structure_diff.yaml"),
-        str(fixtures_dir / "de" / "structure_diff.yaml"),
+        fixtures_dir / "en" / "structure_diff.yaml",
+        fixtures_dir / "de" / "structure_diff.yaml",
     )
 
     with console.capture() as capture:
@@ -359,8 +360,8 @@ def test_misaligned_structure_differences_are_reported() -> None:
     fixtures_dir = base_dir / "fixtures"
 
     result = compare_files(
-        str(fixtures_dir / "en" / "structure_misaligned.yaml"),
-        str(fixtures_dir / "de" / "structure_misaligned.yaml"),
+        fixtures_dir / "en" / "structure_misaligned.yaml",
+        fixtures_dir / "de" / "structure_misaligned.yaml",
     )
 
     # The result should detect that structures differ
@@ -384,8 +385,8 @@ def test_missing_else_block_is_still_reported() -> None:
     fixtures_dir = base_dir / "fixtures"
 
     result = compare_files(
-        str(fixtures_dir / "en" / "structure_missing_else.yaml"),
-        str(fixtures_dir / "de" / "structure_missing_else.yaml"),
+        fixtures_dir / "en" / "structure_missing_else.yaml",
+        fixtures_dir / "de" / "structure_missing_else.yaml",
     )
 
     # Should detect structure difference
@@ -434,7 +435,7 @@ def test_structure_diff_uses_position_aware_token_occurrence_for_missing_block(t
         encoding="utf-8",
     )
 
-    result = compare_files(str(english_file), str(translated_file))
+    result = compare_files(english_file, translated_file)
     lines_by_type = resolved_diff_lines_by_type(result)
     assert len(lines_by_type.get("structure", [])) == 1
     assert lines_by_type["structure"][0] == (7, 7)
@@ -469,7 +470,7 @@ def test_structure_substitution_diff_is_reported(tmp_path) -> None:
         encoding="utf-8",
     )
 
-    result = compare_files(str(english_file), str(translated_file))
+    result = compare_files(english_file, translated_file)
     assert any(diff.diff_type == "structure" for diff in result.rule_differences)
 
     lines_by_type = resolved_diff_lines_by_type(result)
@@ -487,7 +488,7 @@ def test_structure_per_fraction_should_anchor_to_replace_lines_expected_behavior
     """
     base_dir = Path(__file__).parent
     path = base_dir / "fixtures" / "repro"
-    result = compare_files(str(path / "en" / "per_fraction.yaml"), str(path / "nb" / "per_fraction.yaml"))
+    result = compare_files(path / "en" / "per_fraction.yaml", path / "nb" / "per_fraction.yaml")
 
     lines_by_type = resolved_diff_lines_by_type(result)
     assert len(lines_by_type.get("structure", [])) == 1
@@ -502,8 +503,8 @@ def test_print_warnings_shows_misaligned_structures() -> None:
     fixtures_dir = base_dir / "fixtures"
 
     result = compare_files(
-        str(fixtures_dir / "en" / "structure_misaligned.yaml"),
-        str(fixtures_dir / "de" / "structure_misaligned.yaml"),
+        fixtures_dir / "en" / "structure_misaligned.yaml",
+        fixtures_dir / "de" / "structure_misaligned.yaml",
     )
 
     # Raw result should have structure differences detected
@@ -533,8 +534,8 @@ def test_print_warnings_still_shows_missing_else() -> None:
     fixtures_dir = base_dir / "fixtures"
 
     result = compare_files(
-        str(fixtures_dir / "en" / "structure_missing_else.yaml"),
-        str(fixtures_dir / "de" / "structure_missing_else.yaml"),
+        fixtures_dir / "en" / "structure_missing_else.yaml",
+        fixtures_dir / "de" / "structure_missing_else.yaml",
     )
 
     with console.capture() as capture:
@@ -590,7 +591,7 @@ def test_print_warnings_groups_multiple_subgroups_for_single_rule(fixed_console_
     result = ComparisonResult(
         missing_rules=[],
         extra_rules=[],
-        untranslated_text=[(tr, [("t", "first", 24), ("ct", "second", 25)])],
+        untranslated_text=[(tr, [UntranslatedEntry("t", "first", 24), UntranslatedEntry("ct", "second", 25)])],
         rule_differences=diffs,
         file_path="",
         english_rule_count=1,
@@ -687,7 +688,7 @@ def test_print_warnings_verbose_shows_snippets_only_for_differences(fixed_consol
     result = ComparisonResult(
         missing_rules=[missing],
         extra_rules=[],
-        untranslated_text=[(tr_untranslated, [("t", "leave me", 21)])],
+        untranslated_text=[(tr_untranslated, [UntranslatedEntry("t", "leave me", 21)])],
         rule_differences=[diff],
         file_path="",
         english_rule_count=2,
diff --git a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py
index 3c9a10fc..2547b9ec 100644
--- a/PythonScripts/audit_translations/tests/test_cli_end_to_end.py
+++ b/PythonScripts/audit_translations/tests/test_cli_end_to_end.py
@@ -12,7 +12,7 @@
 import pytest
 
 from .. import cli as audit_cli
-from ..auditor import console
+from ..renderer import console
 
 
 def fixture_rules_dir() -> Path:
diff --git a/PythonScripts/audit_translations/tests/test_line_resolver.py b/PythonScripts/audit_translations/tests/test_line_resolver.py
index e5f1eb0a..7bdb3292 100644
--- a/PythonScripts/audit_translations/tests/test_line_resolver.py
+++ b/PythonScripts/audit_translations/tests/test_line_resolver.py
@@ -2,7 +2,7 @@
 Unit tests for line_resolver.py.
 """
 
-from ..dataclasses import RuleDifference, RuleInfo
+from ..models import RuleDifference, RuleInfo
 from ..line_resolver import first_structure_mismatch, resolve_diff_lines
 
 
diff --git a/PythonScripts/audit_translations/tests/test_output_objects.py b/PythonScripts/audit_translations/tests/test_output_objects.py
index 316bff06..5390a234 100644
--- a/PythonScripts/audit_translations/tests/test_output_objects.py
+++ b/PythonScripts/audit_translations/tests/test_output_objects.py
@@ -16,8 +16,8 @@ def collect_issue_tuples(language: str = "de", issue_filter: set[str] | None = N
     for english_path in sorted(english_dir.glob("*.yaml")):
         file_name = english_path.name
         result = compare_files(
-            str(english_path),
-            str(translated_dir / file_name),
+            english_path,
+            translated_dir / file_name,
             issue_filter,
         )
 
@@ -28,8 +28,8 @@ def collect_issue_tuples(language: str = "de", issue_filter: set[str] | None = N
             rows.append((file_name, "extra_rule", rule.key, "", None, rule.line_number, ""))
 
         for rule, entries in result.untranslated_text:
-            for _key, text, line in entries:
-                rows.append((file_name, "untranslated_text", rule.key, "", None, line or rule.line_number, text))
+            for entry in entries:
+                rows.append((file_name, "untranslated_text", rule.key, "", None, entry.line or rule.line_number, entry.text))
 
         for diff in result.rule_differences:
             lines = resolve_diff_lines(diff)
diff --git a/PythonScripts/audit_translations/tests/test_parsers.py b/PythonScripts/audit_translations/tests/test_parsers.py
index 2463209a..32dbb70e 100644
--- a/PythonScripts/audit_translations/tests/test_parsers.py
+++ b/PythonScripts/audit_translations/tests/test_parsers.py
@@ -6,7 +6,7 @@
 from ruamel.yaml import YAML
 from ruamel.yaml.scanner import ScannerError
 
-from ..dataclasses import RuleDifference, RuleInfo
+from ..models import RuleDifference, RuleInfo, UntranslatedEntry
 from ..parsers import (
     build_line_map,
     diff_rules,
@@ -108,7 +108,7 @@ def test_finds_entries_with_lines(self):
 """
         data = yaml.load(content)
         entries = find_untranslated_text_entries(data[0])
-        assert entries == [("t", "not translated", 4)]
+        assert entries == [UntranslatedEntry("t", "not translated", 4)]
 
 
 class TestParseRulesFile:
@@ -159,7 +159,7 @@ def test_detects_untranslated_text(self):
         rules = parse_rules_file(content, data)
         assert rules[0].has_untranslated_text
         assert "not translated" in rules[0].untranslated_keys
-        assert rules[0].untranslated_entries == [("t", "not translated", 4)]
+        assert rules[0].untranslated_entries == [UntranslatedEntry("t", "not translated", 4)]
 
     def test_detects_audit_ignore(self):
         """Ensure detects audit ignore."""
@@ -248,7 +248,7 @@ def test_parse_yaml_file_handles_tabs(self, tmp_path):
         file_path.write_text(content, encoding="utf-8")
         from ..parsers import parse_yaml_file
 
-        rules, _ = parse_yaml_file(str(file_path))
+        rules, _ = parse_yaml_file(file_path)
         assert len(rules) == 1
         assert rules[0].name == "tabbed"
 
@@ -265,7 +265,7 @@ def test_parse_yaml_file_strict_rejects_tabs(self, tmp_path):
         from ..parsers import parse_yaml_file
 
         with pytest.raises(ScannerError):
-            parse_yaml_file(str(file_path), strict=True)
+            parse_yaml_file(file_path, strict=True)
 
 
 class TestParseUnicodeFile:

From a17bc58610a40ad2c5f4e8d0b469053310861bb5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moritz=20Gro=C3=9F?= <hi@mgross.dev>
Date: Thu, 5 Mar 2026 03:58:34 +0100
Subject: [PATCH 2/7] run ruff

---
 PythonScripts/audit_translations/renderer.py             | 9 +++++++--
 PythonScripts/audit_translations/tests/test_auditor.py   | 5 ++---
 .../audit_translations/tests/test_line_resolver.py       | 2 +-
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py
index 438347bc..da422ea1 100644
--- a/PythonScripts/audit_translations/renderer.py
+++ b/PythonScripts/audit_translations/renderer.py
@@ -10,8 +10,8 @@
 from rich.console import Console
 from rich.markup import escape
 
-from .models import ComparisonResult, DiffType, IssueType, RuleDifference, RuleInfo, UntranslatedEntry
 from .line_resolver import resolve_diff_lines
+from .models import ComparisonResult, DiffType, IssueType, RuleDifference, RuleInfo
 
 console = Console()
 
@@ -81,7 +81,12 @@ def add_issue(rule: RuleInfo, group_key: IssueGroupKey, payload: dict[str, Any])
 
     for rule, entries in result.untranslated_text:
         for entry in entries:
-            add_issue(rule, issue_group_key(IssueType.UNTRANSLATED_TEXT), {"line_tr": entry.line or rule.line_number, "text": entry.text})
+            add_issue(
+                rule,
+                issue_group_key(IssueType.UNTRANSLATED_TEXT),
+                {"line_tr": entry.line or rule.line_number,
+                "text": entry.text}
+            )
 
     for diff in result.rule_differences:
         lines = resolve_diff_lines(diff)
diff --git a/PythonScripts/audit_translations/tests/test_auditor.py b/PythonScripts/audit_translations/tests/test_auditor.py
index 921ddce5..0214bf56 100644
--- a/PythonScripts/audit_translations/tests/test_auditor.py
+++ b/PythonScripts/audit_translations/tests/test_auditor.py
@@ -7,10 +7,9 @@
 import pytest
 
 from ..auditor import compare_files, get_yaml_files, list_languages
-from ..renderer import console
-from ..models import ComparisonResult, DiffType, RuleDifference, RuleInfo, UntranslatedEntry
 from ..line_resolver import resolve_diff_lines
-from ..renderer import print_warnings
+from ..models import ComparisonResult, DiffType, RuleDifference, RuleInfo, UntranslatedEntry
+from ..renderer import console, print_warnings
 
 
 @pytest.fixture()
diff --git a/PythonScripts/audit_translations/tests/test_line_resolver.py b/PythonScripts/audit_translations/tests/test_line_resolver.py
index 7bdb3292..569ce4ef 100644
--- a/PythonScripts/audit_translations/tests/test_line_resolver.py
+++ b/PythonScripts/audit_translations/tests/test_line_resolver.py
@@ -2,8 +2,8 @@
 Unit tests for line_resolver.py.
 """
 
-from ..models import RuleDifference, RuleInfo
 from ..line_resolver import first_structure_mismatch, resolve_diff_lines
+from ..models import RuleDifference, RuleInfo
 
 
 def _make_rule(name: str, line_map: dict, line_number: int = 1) -> RuleInfo:

From af46adc6bc2ca60447da2ce01b1570cc0f991cab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moritz=20Gro=C3=9F?= <hi@mgross.dev>
Date: Thu, 5 Mar 2026 04:01:09 +0100
Subject: [PATCH 3/7] run ruff

---
 PythonScripts/audit_translations/renderer.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py
index da422ea1..a8eb8c12 100644
--- a/PythonScripts/audit_translations/renderer.py
+++ b/PythonScripts/audit_translations/renderer.py
@@ -84,8 +84,7 @@ def add_issue(rule: RuleInfo, group_key: IssueGroupKey, payload: dict[str, Any])
             add_issue(
                 rule,
                 issue_group_key(IssueType.UNTRANSLATED_TEXT),
-                {"line_tr": entry.line or rule.line_number,
-                "text": entry.text}
+                {"line_tr": entry.line or rule.line_number, "text": entry.text},
             )
 
     for diff in result.rule_differences:

From 1bac7595dcf62cc36c06c76b94ecb5c5cc0ae90e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moritz=20Gro=C3=9F?= <hi@mgross.dev>
Date: Thu, 5 Mar 2026 04:07:29 +0100
Subject: [PATCH 4/7] modularize functionality

---
 PythonScripts/audit_translations/auditor.py   |   3 +-
 PythonScripts/audit_translations/differ.py    |  98 ++++++++++
 .../audit_translations/extractors.py          |  98 ++++++++++
 .../audit_translations/line_resolver.py       |   2 +-
 PythonScripts/audit_translations/parsers.py   | 176 +-----------------
 .../audit_translations/tests/test_parsers.py  |  10 +-
 6 files changed, 207 insertions(+), 180 deletions(-)
 create mode 100644 PythonScripts/audit_translations/differ.py
 create mode 100644 PythonScripts/audit_translations/extractors.py

diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py
index 223c4b75..0f61c080 100644
--- a/PythonScripts/audit_translations/auditor.py
+++ b/PythonScripts/audit_translations/auditor.py
@@ -12,7 +12,8 @@
 from rich.table import Table
 
 from .models import ComparisonResult, RuleInfo
-from .parsers import diff_rules, parse_yaml_file
+from .differ import diff_rules
+from .parsers import parse_yaml_file
 from .renderer import console, print_warnings
 
 GREEN_FILE_COUNT_THRESHOLD = 7
diff --git a/PythonScripts/audit_translations/differ.py b/PythonScripts/audit_translations/differ.py
new file mode 100644
index 00000000..574c4399
--- /dev/null
+++ b/PythonScripts/audit_translations/differ.py
@@ -0,0 +1,98 @@
+"""
+Rule diffing logic.
+
+Compares English and translated rules to find fine-grained structural differences.
+"""
+
+from .extractors import (
+    extract_conditions,
+    extract_match_pattern,
+    extract_structure_elements,
+    extract_variables,
+    normalize_xpath,
+)
+from .models import DiffType, RuleDifference, RuleInfo
+
+
+def dedup_list(values: list[str]) -> list[str]:
+    """
+    Return a list without duplicates while preserving first-seen order.
+    Originally, rule differences were stored as sets, losing their original order,
+    which is not helpful and why it changed with the help of this function.
+
+    Example:
+        >>> dedup_list(["if:a", "if:b", "if:a"])
+        ['if:a', 'if:b']
+    """
+    return list(dict.fromkeys(values))  # dict preserves insertion order (guaranteed in Python 3.7+)
+
+
+def diff_rules(english_rule: RuleInfo, translated_rule: RuleInfo) -> list[RuleDifference]:
+    """
+    Compare two rules and return fine-grained differences.
+    Ignores text content differences (T/t values) but catches structural changes.
+    """
+    differences: list[RuleDifference] = []
+
+    def add_difference(diff_type: DiffType, description: str, english_snippet: str, translated_snippet: str) -> None:
+        differences.append(
+            RuleDifference(
+                english_rule,
+                translated_rule,
+                diff_type,
+                description,
+                english_snippet,
+                translated_snippet,
+            )
+        )
+
+    # Check match pattern differences
+    en_match_raw = extract_match_pattern(english_rule.data)
+    tr_match_raw = extract_match_pattern(translated_rule.data)
+    en_match = normalize_xpath(en_match_raw)
+    tr_match = normalize_xpath(tr_match_raw)
+    if en_match != tr_match and en_match and tr_match:
+        add_difference(DiffType.MATCH, "Match pattern differs", en_match, tr_match)
+
+    # Check condition differences
+    en_conditions_raw = extract_conditions(english_rule.data)
+    tr_conditions_raw = extract_conditions(translated_rule.data)
+    en_conditions = [normalize_xpath(c) for c in en_conditions_raw]
+    tr_conditions = [normalize_xpath(c) for c in tr_conditions_raw]
+    if en_conditions != tr_conditions:
+        # Find specific differences
+        en_set, tr_set = set(en_conditions), set(tr_conditions)
+        if en_set != tr_set:
+            add_difference(
+                DiffType.CONDITION,
+                "Conditions differ",
+                ", ".join(dedup_list(en_conditions)) or "(none)",
+                ", ".join(dedup_list(tr_conditions)) or "(none)",
+            )
+
+    # Check variable differences
+    en_vars = extract_variables(english_rule.data)
+    tr_vars = extract_variables(translated_rule.data)
+    if en_vars != tr_vars:
+        en_var_names = {v[0] for v in en_vars}
+        tr_var_names = {v[0] for v in tr_vars}
+        if en_var_names != tr_var_names:
+            add_difference(
+                DiffType.VARIABLES,
+                "Variable definitions differ",
+                ", ".join(sorted(en_var_names)) or "(none)",
+                ", ".join(sorted(tr_var_names)) or "(none)",
+            )
+
+    # Check structural differences (test/if/then/else blocks)
+    en_structure = extract_structure_elements(english_rule.data)
+    tr_structure = extract_structure_elements(translated_rule.data)
+    if en_structure != tr_structure:
+        add_difference(
+            DiffType.STRUCTURE,
+            "Rule structure differs (test/if/then/else blocks)",
+            " ".join(en_structure),
+            " ".join(tr_structure),
+        )
+
+    return differences
diff --git a/PythonScripts/audit_translations/extractors.py b/PythonScripts/audit_translations/extractors.py
new file mode 100644
index 00000000..82121a48
--- /dev/null
+++ b/PythonScripts/audit_translations/extractors.py
@@ -0,0 +1,98 @@
+"""
+Rule data extraction functions.
+
+Extracts structural elements, match patterns, conditions, and variables
+from parsed YAML rule data.
+"""
+
+from collections.abc import Iterator
+from typing import Any
+
+from jsonpath_ng.ext import parse
+from jsonpath_ng.jsonpath import Fields
+
+_ALL_FIELDS_EXPR = parse("$..*")  # '..' is recursive descent
+_MATCH_EXPR = parse("$.match")
+
+
+def mapping_key_line(mapping: Any, key: str) -> int | None:
+    """
+    - 'lc' is line and column in YAML file: https://yaml.dev/doc/ruamel.yaml/detail/
+    """
+    if hasattr(mapping, "lc") and hasattr(mapping.lc, "data"):
+        line_info = mapping.lc.data.get(key)
+        return line_info[0] + 1
+    return None
+
+
+def iter_field_matches(node: Any) -> Iterator[tuple[str, Any, Any]]:
+    """
+    Iterate nested mapping fields using jsonpath.
+
+    Returns tuples of (key, child_value, parent_mapping) in traversal order.
+    """
+    for match in _ALL_FIELDS_EXPR.find(node):
+        path = match.path
+        if isinstance(path, Fields) and len(path.fields) == 1:
+            key = path.fields[0]
+            parent = match.context.value if match.context is not None else None
+            yield key, match.value, parent
+
+
+def normalize_match(value: Any) -> str:
+    if isinstance(value, list):
+        return " ".join(str(item) for item in value)
+    if isinstance(value, str):
+        return value
+    return ""
+
+
+def normalize_xpath(value: str) -> str:
+    return " ".join(value.split())
+
+
+def extract_match_pattern(rule_data: Any) -> str:
+    if isinstance(rule_data, dict):
+        matches = _MATCH_EXPR.find(rule_data)
+        if matches:
+            return normalize_match(matches[0].value)
+    return ""
+
+
+def extract_conditions(rule_data: Any) -> list[str]:
+    """Extract all if/else conditions from a rule"""
+    conditions: list[str] = []
+    for key, child, _ in iter_field_matches(rule_data):
+        if key in ("if", "else_if") and isinstance(child, str):
+            conditions.append(child)
+    return conditions
+
+
+def extract_variables(rule_data: Any) -> list[tuple[str, str]]:
+    """Extract variable definitions from a rule"""
+    variables: list[tuple[str, str]] = []
+
+    def add_from_value(value: Any) -> None:
+        if isinstance(value, dict):
+            for name, expr in value.items():
+                variables.append((str(name), str(expr)))
+        elif isinstance(value, list):
+            for item in value:
+                if isinstance(item, dict):
+                    for name, expr in item.items():
+                        variables.append((str(name), str(expr)))
+
+    for key, child, _ in iter_field_matches(rule_data):
+        if key == "variables":
+            add_from_value(child)
+    return variables
+
+
+def extract_structure_elements(rule_data: Any) -> list[str]:
+    """Extract structural elements (test, with, replace blocks) ignoring text content"""
+    elements: list[str] = []
+    tokens = {"test", "if", "else_if", "then", "else", "then_test", "else_test", "with", "replace", "intent"}
+    for key, _, _ in iter_field_matches(rule_data):
+        if key in tokens:
+            elements.append(f"{key}:")
+    return elements
diff --git a/PythonScripts/audit_translations/line_resolver.py b/PythonScripts/audit_translations/line_resolver.py
index 8950049d..da25d200 100644
--- a/PythonScripts/audit_translations/line_resolver.py
+++ b/PythonScripts/audit_translations/line_resolver.py
@@ -5,7 +5,7 @@
 """
 
 from .models import DiffType, RuleDifference, RuleInfo
-from .parsers import extract_structure_elements
+from .extractors import extract_structure_elements
 
 
 def _get_line_map_lines(rule: RuleInfo, kind: DiffType | str, token: str | None = None) -> list[int]:
diff --git a/PythonScripts/audit_translations/parsers.py b/PythonScripts/audit_translations/parsers.py
index f8142da3..590bc085 100644
--- a/PythonScripts/audit_translations/parsers.py
+++ b/PythonScripts/audit_translations/parsers.py
@@ -4,23 +4,18 @@
 Handles parsing of rule files and unicode files to extract rule information.
 """
 
-from collections.abc import Iterator
 from pathlib import Path
 from typing import Any
 
-from jsonpath_ng.ext import parse
-from jsonpath_ng.jsonpath import Fields
 from ruamel.yaml import YAML
 from ruamel.yaml.scanner import ScannerError
 
-from .models import DiffType, RuleDifference, RuleInfo, UntranslatedEntry
+from .extractors import iter_field_matches, mapping_key_line
+from .models import RuleInfo, UntranslatedEntry
 
 _yaml = YAML()
 _yaml.preserve_quotes = True
 
-_ALL_FIELDS_EXPR = parse("$..*")  # '..' is recursive descent
-_MATCH_EXPR = parse("$.match")
-
 
 def is_unicode_file(file_path: Path) -> bool:
     """Check if this is a unicode.yaml or unicode-full.yaml file"""
@@ -73,30 +68,6 @@ def build_raw_blocks(lines: list[str], starts: list[int]) -> list[str]:
     return blocks
 
 
-def mapping_key_line(mapping: Any, key: str) -> int | None:
-    """
-    - 'lc' is line and column in YAML file: https://yaml.dev/doc/ruamel.yaml/detail/
-    """
-    if hasattr(mapping, "lc") and hasattr(mapping.lc, "data"):
-        line_info = mapping.lc.data.get(key)
-        return line_info[0] + 1
-    return None
-
-
-def iter_field_matches(node: Any) -> Iterator[tuple[str, Any, Any]]:
-    """
-    Iterate nested mapping fields using jsonpath.
-
-    Returns tuples of (key, child_value, parent_mapping) in traversal order.
-    """
-    for match in _ALL_FIELDS_EXPR.find(node):
-        path = match.path
-        if isinstance(path, Fields) and len(path.fields) == 1:
-            key = path.fields[0]
-            parent = match.context.value if match.context is not None else None
-            yield key, match.value, parent
-
-
 def _extract_item_fields(item: Any, is_unicode: bool) -> tuple[str, str | None, str | None, Any] | None:
     if is_unicode:
         if isinstance(item, dict) and len(item) == 1:
@@ -222,146 +193,3 @@ def add_line(kind: str, line: int | None) -> None:
         if key in structure_tokens:
             add_line(f"structure:{key}", mapping_key_line(parent, key))
     return line_map
-
-
-def normalize_match(value: Any) -> str:
-    if isinstance(value, list):
-        return " ".join(str(item) for item in value)
-    if isinstance(value, str):
-        return value
-    return ""
-
-
-def normalize_xpath(value: str) -> str:
-    return " ".join(value.split())
-
-
-def dedup_list(values: list[str]) -> list[str]:
-    """
-    Return a list without duplicates while preserving first-seen order.
-    Originally, rule differences were stored as sets, losing their original order,
-    which is not helpful and why it changed with the help of this function.
-
-    Example:
-        >>> dedup_list(["if:a", "if:b", "if:a"])
-        ['if:a', 'if:b']
-    """
-    return list(dict.fromkeys(values))  # dict preserves insertion order (guaranteed in Python 3.7+)
-
-
-def extract_match_pattern(rule_data: Any) -> str:
-    if isinstance(rule_data, dict):
-        matches = _MATCH_EXPR.find(rule_data)
-        if matches:
-            return normalize_match(matches[0].value)
-    return ""
-
-
-def extract_conditions(rule_data: Any) -> list[str]:
-    """Extract all if/else conditions from a rule"""
-    conditions: list[str] = []
-    for key, child, _ in iter_field_matches(rule_data):
-        if key in ("if", "else_if") and isinstance(child, str):
-            conditions.append(child)
-    return conditions
-
-
-def extract_variables(rule_data: Any) -> list[tuple[str, str]]:
-    """Extract variable definitions from a rule"""
-    variables: list[tuple[str, str]] = []
-
-    def add_from_value(value: Any) -> None:
-        if isinstance(value, dict):
-            for name, expr in value.items():
-                variables.append((str(name), str(expr)))
-        elif isinstance(value, list):
-            for item in value:
-                if isinstance(item, dict):
-                    for name, expr in item.items():
-                        variables.append((str(name), str(expr)))
-
-    for key, child, _ in iter_field_matches(rule_data):
-        if key == "variables":
-            add_from_value(child)
-    return variables
-
-
-def extract_structure_elements(rule_data: Any) -> list[str]:
-    """Extract structural elements (test, with, replace blocks) ignoring text content"""
-    elements: list[str] = []
-    tokens = {"test", "if", "else_if", "then", "else", "then_test", "else_test", "with", "replace", "intent"}
-    for key, _, _ in iter_field_matches(rule_data):
-        if key in tokens:
-            elements.append(f"{key}:")
-    return elements
-
-
-def diff_rules(english_rule: RuleInfo, translated_rule: RuleInfo) -> list[RuleDifference]:
-    """
-    Compare two rules and return fine-grained differences.
-    Ignores text content differences (T/t values) but catches structural changes.
-    """
-    differences: list[RuleDifference] = []
-
-    def add_difference(diff_type: DiffType, description: str, english_snippet: str, translated_snippet: str) -> None:
-        differences.append(
-            RuleDifference(
-                english_rule,
-                translated_rule,
-                diff_type,
-                description,
-                english_snippet,
-                translated_snippet,
-            )
-        )
-
-    # Check match pattern differences
-    en_match_raw = extract_match_pattern(english_rule.data)
-    tr_match_raw = extract_match_pattern(translated_rule.data)
-    en_match = normalize_xpath(en_match_raw)
-    tr_match = normalize_xpath(tr_match_raw)
-    if en_match != tr_match and en_match and tr_match:
-        add_difference(DiffType.MATCH, "Match pattern differs", en_match, tr_match)
-
-    # Check condition differences
-    en_conditions_raw = extract_conditions(english_rule.data)
-    tr_conditions_raw = extract_conditions(translated_rule.data)
-    en_conditions = [normalize_xpath(c) for c in en_conditions_raw]
-    tr_conditions = [normalize_xpath(c) for c in tr_conditions_raw]
-    if en_conditions != tr_conditions:
-        # Find specific differences
-        en_set, tr_set = set(en_conditions), set(tr_conditions)
-        if en_set != tr_set:
-            add_difference(
-                DiffType.CONDITION,
-                "Conditions differ",
-                ", ".join(dedup_list(en_conditions)) or "(none)",
-                ", ".join(dedup_list(tr_conditions)) or "(none)",
-            )
-
-    # Check variable differences
-    en_vars = extract_variables(english_rule.data)
-    tr_vars = extract_variables(translated_rule.data)
-    if en_vars != tr_vars:
-        en_var_names = {v[0] for v in en_vars}
-        tr_var_names = {v[0] for v in tr_vars}
-        if en_var_names != tr_var_names:
-            add_difference(
-                DiffType.VARIABLES,
-                "Variable definitions differ",
-                ", ".join(sorted(en_var_names)) or "(none)",
-                ", ".join(sorted(tr_var_names)) or "(none)",
-            )
-
-    # Check structural differences (test/if/then/else blocks)
-    en_structure = extract_structure_elements(english_rule.data)
-    tr_structure = extract_structure_elements(translated_rule.data)
-    if en_structure != tr_structure:
-        add_difference(
-            DiffType.STRUCTURE,
-            "Rule structure differs (test/if/then/else blocks)",
-            " ".join(en_structure),
-            " ".join(tr_structure),
-        )
-
-    return differences
diff --git a/PythonScripts/audit_translations/tests/test_parsers.py b/PythonScripts/audit_translations/tests/test_parsers.py
index 32dbb70e..9315ce5c 100644
--- a/PythonScripts/audit_translations/tests/test_parsers.py
+++ b/PythonScripts/audit_translations/tests/test_parsers.py
@@ -6,14 +6,16 @@
 from ruamel.yaml import YAML
 from ruamel.yaml.scanner import ScannerError
 
-from ..models import RuleDifference, RuleInfo, UntranslatedEntry
-from ..parsers import (
-    build_line_map,
-    diff_rules,
+from ..differ import diff_rules
+from ..extractors import (
     extract_conditions,
     extract_match_pattern,
     extract_structure_elements,
     extract_variables,
+)
+from ..models import RuleDifference, RuleInfo, UntranslatedEntry
+from ..parsers import (
+    build_line_map,
     find_untranslated_text_entries,
     find_untranslated_text_values,
     has_audit_ignore,

From d88e1c2a6da3f9b20e6587d695ba197ec0bc0ed9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moritz=20Gro=C3=9F?= <hi@mgross.dev>
Date: Thu, 5 Mar 2026 04:24:21 +0100
Subject: [PATCH 5/7] move stuff to renderer.py

---
 PythonScripts/audit_translations/auditor.py  | 70 ++++++--------------
 PythonScripts/audit_translations/models.py   | 14 ++++
 PythonScripts/audit_translations/renderer.py | 62 ++++++++++++++++-
 3 files changed, 95 insertions(+), 51 deletions(-)

diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py
index 0f61c080..bfe6501d 100644
--- a/PythonScripts/audit_translations/auditor.py
+++ b/PythonScripts/audit_translations/auditor.py
@@ -8,25 +8,10 @@
 import sys
 from pathlib import Path
 
-from rich.panel import Panel
-from rich.table import Table
-
-from .models import ComparisonResult, RuleInfo
 from .differ import diff_rules
+from .models import AuditSummary, ComparisonResult, RuleInfo
 from .parsers import parse_yaml_file
-from .renderer import console, print_warnings
-
-GREEN_FILE_COUNT_THRESHOLD = 7
-YELLOW_FILE_COUNT_THRESHOLD = 4
-
-
-def file_count_color(file_count: int) -> str:
-    """Map number of translated YAML files to a display color."""
-    if file_count >= GREEN_FILE_COUNT_THRESHOLD:
-        return "green"
-    if file_count >= YELLOW_FILE_COUNT_THRESHOLD:
-        return "yellow"
-    return "red"
+from .renderer import console, print_audit_header, print_audit_summary, print_language_list, print_warnings
 
 
 def split_language_into_base_and_region(language: str) -> tuple[str, str | None]:
@@ -184,10 +169,7 @@ def audit_language(
     # Get list of files to audit
     files = [specific_file] if specific_file else get_yaml_files(english_dir, english_region_dir)
 
-    # Print header
-    console.print(Panel(f"MathCAT Translation Audit: {language.upper()}", style="bold cyan"))
-    console.print("\n  [dim]Comparing against English (en) reference files[/]")
-    console.print(f"  [dim]Files to check: {len(files)}[/]")
+    print_audit_header(language, len(files))
 
     total_issues = 0
     total_missing = 0
@@ -228,48 +210,36 @@ def audit_language(
         total_extra += len(result.extra_rules)
         total_differences += len(result.rule_differences)
 
-    # Summary
-    table = Table(title="SUMMARY", title_style="bold", box=None, show_header=False, padding=(0, 2))
-    table.add_column(width=30)
-    table.add_column()
-    for label, value, color in [
-        ("Files checked", len(files), None),
-        ("Files with issues", files_with_issues, "yellow" if files_with_issues else "green"),
-        ("Files OK", files_ok, "green" if files_ok else None),
-        ("Missing rules", total_missing, "red" if total_missing else "green"),
-        ("Untranslated text", total_untranslated, "yellow" if total_untranslated else "green"),
-        ("Rule differences", total_differences, "magenta" if total_differences else "green"),
-        ("Extra rules", total_extra, "blue" if total_extra else None),
-    ]:
-        table.add_row(label, f"[{color}]{value}[/]" if color else str(value))
-    console.print(Panel(table, style="cyan"))
+    print_audit_summary(
+        AuditSummary(
+            files_checked=len(files),
+            files_with_issues=files_with_issues,
+            files_ok=files_ok,
+            total_missing=total_missing,
+            total_untranslated=total_untranslated,
+            total_extra=total_extra,
+            total_differences=total_differences,
+            total_issues=total_issues,
+        )
+    )
 
     return total_issues
 
 
 def list_languages(rules_dir: str | None = None) -> None:
     """List available languages for auditing"""
-    console.print(Panel("Available Languages", style="bold cyan"))
-
-    table = Table(show_header=True, header_style="dim")
-    table.add_column("Language", justify="center", style="cyan")
-    table.add_column("YAML files", justify="right")
-
     rules_dir_path = get_rules_dir(rules_dir)
+
+    languages: list[tuple[str, int]] = []
     for lang_dir in sorted(rules_dir_path.iterdir()):
         if not lang_dir.is_dir() or lang_dir.name == "en":
             continue
-        base_count = len(get_yaml_files(lang_dir))
-        color = file_count_color(base_count)
-        table.add_row(lang_dir.name, f"[{color}]{base_count}[/] files")
+        languages.append((lang_dir.name, len(get_yaml_files(lang_dir))))
 
         for region_dir in sorted(lang_dir.iterdir()):
             if not region_dir.is_dir() or region_dir.name.lower() == "sharedrules":
                 continue
             code = f"{lang_dir.name}-{region_dir.name}"
-            count = len(get_yaml_files(lang_dir, region_dir))
-            region_color = file_count_color(count)
-            table.add_row(code, f"[{region_color}]{count}[/] files")
+            languages.append((code, len(get_yaml_files(lang_dir, region_dir))))
 
-    console.print(table)
-    console.print("\n  [dim]Reference: en (English) - base translation[/]\n")
+    print_language_list(languages)
diff --git a/PythonScripts/audit_translations/models.py b/PythonScripts/audit_translations/models.py
index e156073c..4df57174 100644
--- a/PythonScripts/audit_translations/models.py
+++ b/PythonScripts/audit_translations/models.py
@@ -117,3 +117,17 @@ class ComparisonResult:
     @property
     def has_issues(self) -> bool:
         return bool(self.missing_rules or self.untranslated_text or self.extra_rules or self.rule_differences)
+
+
+@dataclass
+class AuditSummary:
+    """Accumulated totals from a full language audit."""
+
+    files_checked: int
+    files_with_issues: int
+    files_ok: int
+    total_missing: int
+    total_untranslated: int
+    total_extra: int
+    total_differences: int
+    total_issues: int
diff --git a/PythonScripts/audit_translations/renderer.py b/PythonScripts/audit_translations/renderer.py
index a8eb8c12..2d0f975d 100644
--- a/PythonScripts/audit_translations/renderer.py
+++ b/PythonScripts/audit_translations/renderer.py
@@ -9,9 +9,11 @@
 
 from rich.console import Console
 from rich.markup import escape
+from rich.panel import Panel
+from rich.table import Table
 
 from .line_resolver import resolve_diff_lines
-from .models import ComparisonResult, DiffType, IssueType, RuleDifference, RuleInfo
+from .models import AuditSummary, ComparisonResult, DiffType, IssueType, RuleDifference, RuleInfo
 
 console = Console()
 
@@ -138,3 +140,61 @@ def add_issue(rule: RuleInfo, group_key: IssueGroupKey, payload: dict[str, Any])
                 issues += len(entries)
 
     return issues
+
+
+GREEN_FILE_COUNT_THRESHOLD = 7
+YELLOW_FILE_COUNT_THRESHOLD = 4
+
+
+def file_count_color(file_count: int) -> str:
+    """Map number of translated YAML files to a display color."""
+    if file_count >= GREEN_FILE_COUNT_THRESHOLD:
+        return "green"
+    if file_count >= YELLOW_FILE_COUNT_THRESHOLD:
+        return "yellow"
+    return "red"
+
+
+def print_audit_header(language: str, file_count: int) -> None:
+    """Print the audit header panel."""
+    console.print(Panel(f"MathCAT Translation Audit: {language.upper()}", style="bold cyan"))
+    console.print("\n  [dim]Comparing against English (en) reference files[/]")
+    console.print(f"  [dim]Files to check: {file_count}[/]")
+
+
+def print_audit_summary(summary: AuditSummary) -> None:
+    """Print the audit summary table."""
+    table = Table(title="SUMMARY", title_style="bold", box=None, show_header=False, padding=(0, 2))
+    table.add_column(width=30)
+    table.add_column()
+    for label, value, color in [
+        ("Files checked", summary.files_checked, None),
+        ("Files with issues", summary.files_with_issues, "yellow" if summary.files_with_issues else "green"),
+        ("Files OK", summary.files_ok, "green" if summary.files_ok else None),
+        ("Missing rules", summary.total_missing, "red" if summary.total_missing else "green"),
+        ("Untranslated text", summary.total_untranslated, "yellow" if summary.total_untranslated else "green"),
+        ("Rule differences", summary.total_differences, "magenta" if summary.total_differences else "green"),
+        ("Extra rules", summary.total_extra, "blue" if summary.total_extra else None),
+    ]:
+        table.add_row(label, f"[{color}]{value}[/]" if color else str(value))
+    console.print(Panel(table, style="cyan"))
+
+
+def print_language_list(languages: list[tuple[str, int]]) -> None:
+    """Print the available languages table.
+
+    Args:
+        languages: List of (language_code, yaml_file_count) tuples.
+    """
+    console.print(Panel("Available Languages", style="bold cyan"))
+
+    table = Table(show_header=True, header_style="dim")
+    table.add_column("Language", justify="center", style="cyan")
+    table.add_column("YAML files", justify="right")
+
+    for code, count in languages:
+        color = file_count_color(count)
+        table.add_row(code, f"[{color}]{count}[/] files")
+
+    console.print(table)
+    console.print("\n  [dim]Reference: en (English) - base translation[/]\n")

From 134c2627cb5ebfc252eaae9f951c1c82065404da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moritz=20Gro=C3=9F?= <hi@mgross.dev>
Date: Thu, 5 Mar 2026 04:33:33 +0100
Subject: [PATCH 6/7] dont throw generic error. split up tests

---
 PythonScripts/audit_translations/auditor.py   |  13 +-
 PythonScripts/audit_translations/cli.py       |  19 +-
 PythonScripts/audit_translations/models.py    |   6 +-
 PythonScripts/audit_translations/parsers.py   |   8 -
 .../audit_translations/tests/test_auditor.py  |   4 -
 .../audit_translations/tests/test_differ.py   | 145 +++++++++++
 .../tests/test_extractors.py                  |  66 +++++
 .../audit_translations/tests/test_parsers.py  | 225 +-----------------
 8 files changed, 242 insertions(+), 244 deletions(-)
 create mode 100644 PythonScripts/audit_translations/tests/test_differ.py
 create mode 100644 PythonScripts/audit_translations/tests/test_extractors.py

diff --git a/PythonScripts/audit_translations/auditor.py b/PythonScripts/audit_translations/auditor.py
index bfe6501d..3c333628 100644
--- a/PythonScripts/audit_translations/auditor.py
+++ b/PythonScripts/audit_translations/auditor.py
@@ -5,11 +5,10 @@
 and for performing full language audits.
 """
 
-import sys
 from pathlib import Path
 
 from .differ import diff_rules
-from .models import AuditSummary, ComparisonResult, RuleInfo
+from .models import AuditError, AuditSummary, ComparisonResult, RuleInfo
 from .parsers import parse_yaml_file
 from .renderer import console, print_audit_header, print_audit_summary, print_language_list, print_warnings
 
@@ -132,7 +131,6 @@ def merge_rules(base_rules: list[RuleInfo], region_rules: list[RuleInfo]) -> lis
         extra_rules=extra_rules,
         untranslated_text=untranslated_text,
         rule_differences=rule_differences,
-        file_path=translated_path,
         english_rule_count=len(english_rules),
         translated_rule_count=len(translated_rules),
     )
@@ -155,16 +153,13 @@ def audit_language(
     english_region_dir = english_dir / region if region else None
 
     if not english_dir.exists():
-        console.print(f"\n[red]✗ Error:[/] English rules directory not found: {english_dir}")
-        sys.exit(1)
+        raise AuditError(f"English rules directory not found: {english_dir}")
 
     if not translated_dir.exists():
-        console.print(f"\n[red]✗ Error:[/] Translation directory not found: {translated_dir}")
-        sys.exit(1)
+        raise AuditError(f"Translation directory not found: {translated_dir}")
 
     if region and not (translated_region_dir and translated_region_dir.exists()):
-        console.print(f"\n[red]✗ Error:[/] Region directory not found: {translated_region_dir}")
-        sys.exit(1)
+        raise AuditError(f"Region directory not found: {translated_region_dir}")
 
     # Get list of files to audit
     files = [specific_file] if specific_file else get_yaml_files(english_dir, english_region_dir)
diff --git a/PythonScripts/audit_translations/cli.py b/PythonScripts/audit_translations/cli.py
index ed04c402..711089ca 100644
--- a/PythonScripts/audit_translations/cli.py
+++ b/PythonScripts/audit_translations/cli.py
@@ -8,6 +8,7 @@
 import sys
 
 from .auditor import audit_language, list_languages
+from .models import AuditError
 from .renderer import console
 
 
@@ -59,10 +60,14 @@ def main() -> None:
                     sys.exit(1)
                 issue_filter = set(tokens)
 
-        audit_language(
-            args.language,
-            args.specific_file,
-            args.rules_dir,
-            issue_filter,
-            args.verbose,
-        )
+        try:
+            audit_language(
+                args.language,
+                args.specific_file,
+                args.rules_dir,
+                issue_filter,
+                args.verbose,
+            )
+        except AuditError as exc:
+            console.print(f"\n[red]✗ Error:[/] {exc}")
+            sys.exit(1)
diff --git a/PythonScripts/audit_translations/models.py b/PythonScripts/audit_translations/models.py
index 4df57174..a54dadb7 100644
--- a/PythonScripts/audit_translations/models.py
+++ b/PythonScripts/audit_translations/models.py
@@ -6,10 +6,13 @@
 
 from dataclasses import dataclass, field
 from enum import StrEnum
-from pathlib import Path
 from typing import Any
 
 
+class AuditError(Exception):
+    """Raised when the audit encounters a configuration or validation error."""
+
+
 class IssueType(StrEnum):
     """Top-level issue categories used by the audit renderer."""
 
@@ -109,7 +112,6 @@ class ComparisonResult:
     missing_rules: list[RuleInfo]  # Rules in English but not in translation
     extra_rules: list[RuleInfo]  # Rules in translation but not in English
     untranslated_text: list[tuple[RuleInfo, list[UntranslatedEntry]]]
-    file_path: Path | str
     english_rule_count: int
     translated_rule_count: int
     rule_differences: list[RuleDifference] = field(default_factory=list)  # Fine-grained diffs
diff --git a/PythonScripts/audit_translations/parsers.py b/PythonScripts/audit_translations/parsers.py
index 590bc085..421ebb77 100644
--- a/PythonScripts/audit_translations/parsers.py
+++ b/PythonScripts/audit_translations/parsers.py
@@ -130,14 +130,6 @@ def has_audit_ignore(content: str) -> bool:
     return "# audit-ignore" in content
 
 
-def find_untranslated_text_values(node: Any) -> list[str]:
-    """
-    Find lowercase text keys (t, ot, ct, spell, pronounce, ifthenelse) that should be uppercase in translations.
-    Returns list of the untranslated text values found.
-    """
-    return [entry.text for entry in find_untranslated_text_entries(node)]
-
-
 def find_untranslated_text_entries(node: Any) -> list[UntranslatedEntry]:
     """
     Find lowercase text keys (t, ot, ct, spell, pronounce, ifthenelse) and their line numbers.
diff --git a/PythonScripts/audit_translations/tests/test_auditor.py b/PythonScripts/audit_translations/tests/test_auditor.py
index 0214bf56..e1cd94b7 100644
--- a/PythonScripts/audit_translations/tests/test_auditor.py
+++ b/PythonScripts/audit_translations/tests/test_auditor.py
@@ -93,7 +93,6 @@ def test_comparison_result_object_fields() -> None:
         extra_rules=[extra],
         untranslated_text=[(untranslated, [UntranslatedEntry("t", "x", 31)])],
         rule_differences=[diff],
-        file_path="",
         english_rule_count=1,
         translated_rule_count=1,
     )
@@ -592,7 +591,6 @@ def test_print_warnings_groups_multiple_subgroups_for_single_rule(fixed_console_
         extra_rules=[],
         untranslated_text=[(tr, [UntranslatedEntry("t", "first", 24), UntranslatedEntry("ct", "second", 25)])],
         rule_differences=diffs,
-        file_path="",
         english_rule_count=1,
         translated_rule_count=1,
     )
@@ -644,7 +642,6 @@ def test_print_warnings_groups_missing_and_extra_by_rule(fixed_console_width) ->
         extra_rules=[extra],
         untranslated_text=[],
         rule_differences=[diff],
-        file_path="",
         english_rule_count=2,
         translated_rule_count=2,
     )
@@ -689,7 +686,6 @@ def test_print_warnings_verbose_shows_snippets_only_for_differences(fixed_consol
         extra_rules=[],
         untranslated_text=[(tr_untranslated, [UntranslatedEntry("t", "leave me", 21)])],
         rule_differences=[diff],
-        file_path="",
         english_rule_count=2,
         translated_rule_count=2,
     )
diff --git a/PythonScripts/audit_translations/tests/test_differ.py b/PythonScripts/audit_translations/tests/test_differ.py
new file mode 100644
index 00000000..eb7bc3a2
--- /dev/null
+++ b/PythonScripts/audit_translations/tests/test_differ.py
@@ -0,0 +1,145 @@
+"""
+Tests for differ.py.
+"""
+
+from ..differ import diff_rules
+from ..models import RuleDifference, RuleInfo
+
+
+def make_rule(name: str, tag: str, data) -> RuleInfo:
+    """Helper to create RuleInfo for testing"""
+    return RuleInfo(
+        name=name,
+        tag=tag,
+        key=f"{name}|{tag}",
+        line_number=1,
+        raw_content="",
+        data=data,
+    )
+
+
+class TestDiffRules:
+    def test_identical_rules_no_diff(self):
+        """Ensure identical rules no diff."""
+        data = {"name": "test", "tag": "mo", "match": "self::m:mo", "replace": [{"T": "text"}]}
+        en = make_rule("test", "mo", data)
+        tr = make_rule("test", "mo", data)
+        assert diff_rules(en, tr) == []
+
+    def test_detects_match_pattern_difference(self):
+        """Ensure detects match pattern difference."""
+        en = make_rule("test", "mo", {"match": "self::m:mo"})
+        tr = make_rule("test", "mo", {"match": "self::m:mi"})
+        diffs = diff_rules(en, tr)
+        assert len(diffs) == 1
+        assert diffs[0].diff_type == "match"
+        assert "self::m:mo" in diffs[0].english_snippet
+        assert "self::m:mi" in diffs[0].translated_snippet
+
+    def test_detects_condition_difference(self):
+        """Ensure detects condition difference."""
+        en = make_rule("test", "mo", {"if": "condition1"})
+        tr = make_rule("test", "mo", {"if": "condition2"})
+        diffs = diff_rules(en, tr)
+        assert any(d.diff_type == "condition" for d in diffs)
+
+    def test_condition_snippet_preserves_rule_order(self):
+        """
+        Condition snippets should preserve the order seen in each rule.
+        Originally, alphabetical order was used, which is not very helpful.
+        """
+        en = make_rule(
+            "test",
+            "mo",
+            {
+                "test": {
+                    "if": "condition_b",
+                    "then": [
+                        {
+                            "test": {
+                                "if": "condition_a",
+                                "then": [{"T": "x"}],
+                            }
+                        }
+                    ],
+                }
+            },
+        )
+        tr = make_rule("test", "mo", {"if": "condition_c"})
+        diffs: list[RuleDifference] = diff_rules(en, tr)
+        cond_diff: RuleDifference = next(d for d in diffs if d.diff_type == "condition")
+        assert cond_diff.english_snippet == "condition_b, condition_a"
+        assert cond_diff.translated_snippet == "condition_c"
+
+    def test_condition_snippet_deduplicates_repeated_conditions(self):
+        """
+        Repeated conditions should be shown once, in first-seen order.
+        """
+        en = make_rule(
+            "test",
+            "mo",
+            {
+                "test": {
+                    "if": "condition_a",
+                    "then": [
+                        {
+                            "test": {
+                                "if": "condition_a",
+                                "then": [{"T": "x"}],
+                            }
+                        },
+                        {
+                            "test": {
+                                "if": "condition_b",
+                                "then": [{"T": "y"}],
+                            }
+                        },
+                    ],
+                }
+            },
+        )
+        tr = make_rule("test", "mo", {"if": "condition_c"})
+        diffs: list[RuleDifference] = diff_rules(en, tr)
+        cond_diff: RuleDifference = next(d for d in diffs if d.diff_type == "condition")
+
+        # without deduplication, we'd have "condition_a" repeated.
+        assert cond_diff.english_snippet == "condition_a, condition_b"
+        assert cond_diff.translated_snippet == "condition_c"
+
+    def test_detects_missing_condition(self):
+        """Ensure detects missing condition."""
+        en = make_rule("test", "mo", {"if": "condition1"})
+        tr = make_rule("test", "mo", {"replace": [{"T": "text"}]})
+        diffs = diff_rules(en, tr)
+        assert any(d.diff_type == "condition" for d in diffs)
+
+    def test_detects_variable_difference(self):
+        """Ensure detects variable difference."""
+        en = make_rule("test", "mo", {"variables": [{"foo": "bar"}]})
+        tr = make_rule("test", "mo", {"variables": [{"baz": "qux"}]})
+        diffs = diff_rules(en, tr)
+        assert any(d.diff_type == "variables" for d in diffs)
+
+    def test_detects_structure_difference(self):
+        """Ensure detects structure difference."""
+        en = make_rule("test", "mo", {"test": {"if": "cond", "then": [{"T": "yes"}], "else": [{"T": "no"}]}})
+        tr = make_rule("test", "mo", {"test": {"if": "cond", "then": [{"T": "ja"}]}})
+        diffs = diff_rules(en, tr)
+        assert any(d.diff_type == "structure" for d in diffs)
+
+    def test_multiple_differences(self):
+        """Ensure multiple differences."""
+        en = make_rule("test", "mo", {"match": "self::m:mo", "if": "cond1"})
+        tr = make_rule("test", "mo", {"match": "self::m:mi", "if": "cond2"})
+        diffs = diff_rules(en, tr)
+        assert len(diffs) == 2
+        types = {d.diff_type for d in diffs}
+        assert "match" in types
+        assert "condition" in types
+
+    def test_ignores_text_content_differences(self):
+        """Ensure ignores text content differences."""
+        en = make_rule("test", "mo", {"replace": [{"T": "hello"}]})
+        tr = make_rule("test", "mo", {"replace": [{"T": "hallo"}]})
+        diffs = diff_rules(en, tr)
+        assert diffs == []  # text differences are intentional translations
diff --git a/PythonScripts/audit_translations/tests/test_extractors.py b/PythonScripts/audit_translations/tests/test_extractors.py
new file mode 100644
index 00000000..0bc0079e
--- /dev/null
+++ b/PythonScripts/audit_translations/tests/test_extractors.py
@@ -0,0 +1,66 @@
+"""
+Tests for extractors.py.
+"""
+
+from ..extractors import (
+    extract_conditions,
+    extract_match_pattern,
+    extract_structure_elements,
+    extract_variables,
+)
+
+
+class TestExtractMatchPattern:
+    def test_extracts_inline_match(self):
+        """Ensure extracts inline match."""
+        data = {"match": "self::m:mo"}
+        assert extract_match_pattern(data) == "self::m:mo"
+
+    def test_extracts_array_match(self):
+        """Ensure extracts array match."""
+        data = {"match": ["self::m:mo", "@intent"]}
+        assert extract_match_pattern(data) == "self::m:mo @intent"
+
+    def test_returns_empty_for_no_match(self):
+        """Ensure returns empty for no match."""
+        data = {"replace": [{"T": "text"}]}
+        assert extract_match_pattern(data) == ""
+
+
+class TestExtractConditions:
+    def test_extracts_single_condition(self):
+        """Ensure extracts single condition."""
+        data = {"if": "$Verbosity"}
+        assert extract_conditions(data) == ["$Verbosity"]
+
+    def test_extracts_multiple_conditions(self):
+        """Ensure extracts multiple conditions."""
+        data = {"if": "condition1", "then": "something", "else_test": {"if": "condition2"}}
+        conditions = extract_conditions(data)
+        assert "condition1" in conditions
+        assert "condition2" in conditions
+
+
+class TestExtractVariables:
+    def test_extracts_variables(self):
+        """Ensure extracts variables."""
+        data = {"variables": [{"name": "value"}, {"other": "val2"}]}
+        variables = extract_variables(data)
+        assert ("name", "value") in variables
+        assert ("other", "val2") in variables
+
+    def test_returns_empty_for_no_variables(self):
+        """Ensure returns empty for no variables."""
+        data = {"match": "."}
+        assert extract_variables(data) == []
+
+
+class TestExtractStructureElements:
+    def test_extracts_test_structure(self):
+        """Ensure extracts test structure."""
+        data = {"test": {"if": "condition", "then": [{"T": "yes"}], "else": [{"T": "no"}]}}
+        elements = extract_structure_elements(data)
+        assert "test:" in elements
+        assert "if:" in elements
+        assert "then:" in elements
+        assert "else:" in elements
diff --git a/PythonScripts/audit_translations/tests/test_parsers.py b/PythonScripts/audit_translations/tests/test_parsers.py
index 9315ce5c..ed252cde 100644
--- a/PythonScripts/audit_translations/tests/test_parsers.py
+++ b/PythonScripts/audit_translations/tests/test_parsers.py
@@ -6,18 +6,10 @@
 from ruamel.yaml import YAML
 from ruamel.yaml.scanner import ScannerError
 
-from ..differ import diff_rules
-from ..extractors import (
-    extract_conditions,
-    extract_match_pattern,
-    extract_structure_elements,
-    extract_variables,
-)
-from ..models import RuleDifference, RuleInfo, UntranslatedEntry
+from ..models import UntranslatedEntry
 from ..parsers import (
     build_line_map,
     find_untranslated_text_entries,
-    find_untranslated_text_values,
     has_audit_ignore,
     parse_rules_file,
     parse_unicode_file,
@@ -47,27 +39,27 @@ class TestFindUntranslatedTextKeys:
     def test_finds_lowercase_t(self):
         """Ensure finds lowercase t."""
         content = {"t": "hello world"}
-        assert find_untranslated_text_values(content) == ["hello world"]
+        assert [e.text for e in find_untranslated_text_entries(content)] == ["hello world"]
 
     def test_finds_lowercase_ot(self):
         """Ensure finds lowercase ot."""
         content = {"ot": "open paren"}
-        assert find_untranslated_text_values(content) == ["open paren"]
+        assert [e.text for e in find_untranslated_text_entries(content)] == ["open paren"]
 
     def test_finds_lowercase_ct(self):
         """Ensure finds lowercase ct."""
         content = {"ct": "close paren"}
-        assert find_untranslated_text_values(content) == ["close paren"]
+        assert [e.text for e in find_untranslated_text_entries(content)] == ["close paren"]
 
     def test_finds_multiple(self):
         """Ensure finds multiple."""
         content = {"t": "one", "ot": "two", "ct": "three"}
-        assert set(find_untranslated_text_values(content)) == {"one", "two", "three"}
+        assert {e.text for e in find_untranslated_text_entries(content)} == {"one", "two", "three"}
 
     def test_ignores_uppercase_T(self):
         """Ensure ignores uppercase T."""
         content = {"T": "translated"}
-        assert find_untranslated_text_values(content) == []
+        assert [e.text for e in find_untranslated_text_entries(content)] == []
 
     def test_finds_spell_and_pronounce(self):
         """Detects lowercase spell and pronounce markers.
@@ -75,7 +67,7 @@ def test_finds_spell_and_pronounce(self):
         Extends coverage beyond basic t/ot/ct fields.
         Flags auxiliary translation-bearing keys."""
         content = {"spell": "alpha", "pronounce": "beta"}
-        assert set(find_untranslated_text_values(content)) == {"alpha", "beta"}
+        assert {e.text for e in find_untranslated_text_entries(content)} == {"alpha", "beta"}
 
     def test_ignores_uppercase_variants(self):
         """Ignores uppercase variants of extended markers.
@@ -83,22 +75,22 @@ def test_ignores_uppercase_variants(self):
         Honors already-verified spell/pronounce/IfThenElse content.
         Avoids double-reporting translated data."""
         content = {"PRONOUNCE": "gamma", "IFTHENELSE": "delta"}
-        assert find_untranslated_text_values(content) == []
+        assert [e.text for e in find_untranslated_text_entries(content)] == []
 
     def test_ignores_variable_references(self):
         """Ensure ignores variable references."""
         content = {"t": "$variable"}
-        assert find_untranslated_text_values(content) == []
+        assert [e.text for e in find_untranslated_text_entries(content)] == []
 
     def test_ignores_xpath_expressions(self):
         """Ensure ignores xpath expressions."""
         content = {"t": "@attr"}
-        assert find_untranslated_text_values(content) == []
+        assert [e.text for e in find_untranslated_text_entries(content)] == []
 
     def test_ignores_single_punctuation(self):
         """Ensure ignores single punctuation."""
         content = {"t": "."}
-        assert find_untranslated_text_values(content) == []
+        assert [e.text for e in find_untranslated_text_entries(content)] == []
 
     def test_finds_entries_with_lines(self):
         """Ensure finds entries with line numbers."""
@@ -347,62 +339,6 @@ def test_mixed_valid_and_skipped_items(self):
         assert rules[1].line_number == 7
 
 
-class TestExtractMatchPattern:
-    def test_extracts_inline_match(self):
-        """Ensure extracts inline match."""
-        data = {"match": "self::m:mo"}
-        assert extract_match_pattern(data) == "self::m:mo"
-
-    def test_extracts_array_match(self):
-        """Ensure extracts array match."""
-        data = {"match": ["self::m:mo", "@intent"]}
-        assert extract_match_pattern(data) == "self::m:mo @intent"
-
-    def test_returns_empty_for_no_match(self):
-        """Ensure returns empty for no match."""
-        data = {"replace": [{"T": "text"}]}
-        assert extract_match_pattern(data) == ""
-
-
-class TestExtractConditions:
-    def test_extracts_single_condition(self):
-        """Ensure extracts single condition."""
-        data = {"if": "$Verbosity"}
-        assert extract_conditions(data) == ["$Verbosity"]
-
-    def test_extracts_multiple_conditions(self):
-        """Ensure extracts multiple conditions."""
-        data = {"if": "condition1", "then": "something", "else_test": {"if": "condition2"}}
-        conditions = extract_conditions(data)
-        assert "condition1" in conditions
-        assert "condition2" in conditions
-
-
-class TestExtractVariables:
-    def test_extracts_variables(self):
-        """Ensure extracts variables."""
-        data = {"variables": [{"name": "value"}, {"other": "val2"}]}
-        variables = extract_variables(data)
-        assert ("name", "value") in variables
-        assert ("other", "val2") in variables
-
-    def test_returns_empty_for_no_variables(self):
-        """Ensure returns empty for no variables."""
-        data = {"match": "."}
-        assert extract_variables(data) == []
-
-
-class TestExtractStructureElements:
-    def test_extracts_test_structure(self):
-        """Ensure extracts test structure."""
-        data = {"test": {"if": "condition", "then": [{"T": "yes"}], "else": [{"T": "no"}]}}
-        elements = extract_structure_elements(data)
-        assert "test:" in elements
-        assert "if:" in elements
-        assert "then:" in elements
-        assert "else:" in elements
-
-
 class TestBuildLineMap:
     def test_builds_line_map_for_rule_elements(self):
         """Ensure line map captures nested element lines."""
@@ -425,142 +361,3 @@ def test_builds_line_map_for_rule_elements(self):
         assert line_map["variables"] == [5]
         assert line_map["structure:test"] == [7]
         assert line_map["structure:if"] == [4, 8]
-
-
-def make_rule(name: str, tag: str, data) -> RuleInfo:
-    """Helper to create RuleInfo for testing"""
-    return RuleInfo(
-        name=name,
-        tag=tag,
-        key=f"{name}|{tag}",
-        line_number=1,
-        raw_content="",
-        data=data,
-    )
-
-
-class TestDiffRules:
-    def test_identical_rules_no_diff(self):
-        """Ensure identical rules no diff."""
-        data = {"name": "test", "tag": "mo", "match": "self::m:mo", "replace": [{"T": "text"}]}
-        en = make_rule("test", "mo", data)
-        tr = make_rule("test", "mo", data)
-        assert diff_rules(en, tr) == []
-
-    def test_detects_match_pattern_difference(self):
-        """Ensure detects match pattern difference."""
-        en = make_rule("test", "mo", {"match": "self::m:mo"})
-        tr = make_rule("test", "mo", {"match": "self::m:mi"})
-        diffs = diff_rules(en, tr)
-        assert len(diffs) == 1
-        assert diffs[0].diff_type == "match"
-        assert "self::m:mo" in diffs[0].english_snippet
-        assert "self::m:mi" in diffs[0].translated_snippet
-
-    def test_detects_condition_difference(self):
-        """Ensure detects condition difference."""
-        en = make_rule("test", "mo", {"if": "condition1"})
-        tr = make_rule("test", "mo", {"if": "condition2"})
-        diffs = diff_rules(en, tr)
-        assert any(d.diff_type == "condition" for d in diffs)
-
-    def test_condition_snippet_preserves_rule_order(self):
-        """
-        Condition snippets should preserve the order seen in each rule.
-        Originally, alphabetical order was used, which is not very helpful.
-        """
-        en = make_rule(
-            "test",
-            "mo",
-            {
-                "test": {
-                    "if": "condition_b",
-                    "then": [
-                        {
-                            "test": {
-                                "if": "condition_a",
-                                "then": [{"T": "x"}],
-                            }
-                        }
-                    ],
-                }
-            },
-        )
-        tr = make_rule("test", "mo", {"if": "condition_c"})
-        diffs: list[RuleDifference] = diff_rules(en, tr)
-        cond_diff: RuleDifference = next(d for d in diffs if d.diff_type == "condition")
-        assert cond_diff.english_snippet == "condition_b, condition_a"
-        assert cond_diff.translated_snippet == "condition_c"
-
-    def test_condition_snippet_deduplicates_repeated_conditions(self):
-        """
-        Repeated conditions should be shown once, in first-seen order.
-        """
-        en = make_rule(
-            "test",
-            "mo",
-            {
-                "test": {
-                    "if": "condition_a",
-                    "then": [
-                        {
-                            "test": {
-                                "if": "condition_a",
-                                "then": [{"T": "x"}],
-                            }
-                        },
-                        {
-                            "test": {
-                                "if": "condition_b",
-                                "then": [{"T": "y"}],
-                            }
-                        },
-                    ],
-                }
-            },
-        )
-        tr = make_rule("test", "mo", {"if": "condition_c"})
-        diffs: list[RuleDifference] = diff_rules(en, tr)
-        cond_diff: RuleDifference = next(d for d in diffs if d.diff_type == "condition")
-
-        # without deduplication, we'd have "condition_a" repeated.
-        assert cond_diff.english_snippet == "condition_a, condition_b"
-        assert cond_diff.translated_snippet == "condition_c"
-
-    def test_detects_missing_condition(self):
-        """Ensure detects missing condition."""
-        en = make_rule("test", "mo", {"if": "condition1"})
-        tr = make_rule("test", "mo", {"replace": [{"T": "text"}]})
-        diffs = diff_rules(en, tr)
-        assert any(d.diff_type == "condition" for d in diffs)
-
-    def test_detects_variable_difference(self):
-        """Ensure detects variable difference."""
-        en = make_rule("test", "mo", {"variables": [{"foo": "bar"}]})
-        tr = make_rule("test", "mo", {"variables": [{"baz": "qux"}]})
-        diffs = diff_rules(en, tr)
-        assert any(d.diff_type == "variables" for d in diffs)
-
-    def test_detects_structure_difference(self):
-        """Ensure detects structure difference."""
-        en = make_rule("test", "mo", {"test": {"if": "cond", "then": [{"T": "yes"}], "else": [{"T": "no"}]}})
-        tr = make_rule("test", "mo", {"test": {"if": "cond", "then": [{"T": "ja"}]}})
-        diffs = diff_rules(en, tr)
-        assert any(d.diff_type == "structure" for d in diffs)
-
-    def test_multiple_differences(self):
-        """Ensure multiple differences."""
-        en = make_rule("test", "mo", {"match": "self::m:mo", "if": "cond1"})
-        tr = make_rule("test", "mo", {"match": "self::m:mi", "if": "cond2"})
-        diffs = diff_rules(en, tr)
-        assert len(diffs) == 2
-        types = {d.diff_type for d in diffs}
-        assert "match" in types
-        assert "condition" in types
-
-    def test_ignores_text_content_differences(self):
-        """Ensure ignores text content differences."""
-        en = make_rule("test", "mo", {"replace": [{"T": "hello"}]})
-        tr = make_rule("test", "mo", {"replace": [{"T": "hallo"}]})
-        diffs = diff_rules(en, tr)
-        assert diffs == []  # text differences are intentional translations

From 7e70bed74373ecc373bb1762dacf4ca6dfb6861a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Moritz=20Gro=C3=9F?= <hi@mgross.dev>
Date: Thu, 5 Mar 2026 04:55:46 +0100
Subject: [PATCH 7/7] use Enum instead of str

---
 PythonScripts/audit_translations/__init__.py  |  5 +--
 PythonScripts/audit_translations/cli.py       |  1 +
 .../audit_translations/line_resolver.py       | 32 +++++++++----------
 .../audit_translations/tests/conftest.py      |  4 +++
 PythonScripts/pyproject.toml                  |  1 -
 PythonScripts/uv.lock                         | 11 -------
 6 files changed, 22 insertions(+), 32 deletions(-)
 create mode 100644 PythonScripts/audit_translations/tests/conftest.py

diff --git a/PythonScripts/audit_translations/__init__.py b/PythonScripts/audit_translations/__init__.py
index cd6dab62..1d076494 100644
--- a/PythonScripts/audit_translations/__init__.py
+++ b/PythonScripts/audit_translations/__init__.py
@@ -8,10 +8,7 @@
 Read README.md for more details.
 """
 
-import sys
-
-sys.stdout.reconfigure(encoding="utf-8")
-from .cli import main  # noqa: E402
+from .cli import main
 
 __all__ = [
     "main",
diff --git a/PythonScripts/audit_translations/cli.py b/PythonScripts/audit_translations/cli.py
index 711089ca..aaba1444 100644
--- a/PythonScripts/audit_translations/cli.py
+++ b/PythonScripts/audit_translations/cli.py
@@ -14,6 +14,7 @@
 
 def main() -> None:
     """Main entry point for the audit tool"""
+    sys.stdout.reconfigure(encoding="utf-8")
 
     parser = argparse.ArgumentParser(
         description="Audit MathCAT translation files against English originals",
diff --git a/PythonScripts/audit_translations/line_resolver.py b/PythonScripts/audit_translations/line_resolver.py
index da25d200..ee22c59c 100644
--- a/PythonScripts/audit_translations/line_resolver.py
+++ b/PythonScripts/audit_translations/line_resolver.py
@@ -4,15 +4,15 @@
 Maps rule diff types and structure tokens to precise YAML source line numbers.
 """
 
-from .models import DiffType, RuleDifference, RuleInfo
 from .extractors import extract_structure_elements
+from .models import DiffType, RuleDifference, RuleInfo
 
 
-def _get_line_map_lines(rule: RuleInfo, kind: DiffType | str, token: str | None = None) -> list[int]:
+def _get_line_map_lines(rule: RuleInfo, kind: DiffType, token: str | None = None) -> list[int]:
     """Return the line-number list for a given element kind from the rule's line map."""
-    if kind in ("match", "condition", "variables"):
+    if kind in (DiffType.MATCH, DiffType.CONDITION, DiffType.VARIABLES):
         return rule.line_map.get(kind, [])
-    if kind == "structure" and token:
+    if kind == DiffType.STRUCTURE and token:
         return rule.line_map.get(f"structure:{token.rstrip(':')}", [])
     return []
 
@@ -40,7 +40,7 @@ def first_structure_mismatch(
 
 def resolve_issue_line_at_position(
     rule: RuleInfo,
-    kind: DiffType | str,
+    kind: DiffType,
     token: str | None = None,
     position: int = 0,
 ) -> int | None:
@@ -60,7 +60,7 @@ def resolve_issue_line_at_position(
     return lines[position] if position < len(lines) else None
 
 
-def resolve_issue_line(rule: RuleInfo, kind: DiffType | str, token: str | None = None) -> int | None:
+def resolve_issue_line(rule: RuleInfo, kind: DiffType, token: str | None = None) -> int | None:
     """
     Resolve the line number for an issue within a rule.
 
@@ -69,7 +69,7 @@ def resolve_issue_line(rule: RuleInfo, kind: DiffType | str, token: str | None =
     to rule.line_number to avoid misleading line numbers when elements are missing.
     """
     lines = _get_line_map_lines(rule, kind, token)
-    if kind == "structure" and token:
+    if kind == DiffType.STRUCTURE and token:
         return lines[0] if lines else None
     return lines[0] if lines else rule.line_number
 
@@ -115,27 +115,27 @@ def resolve_structure_issue_lines(diff: RuleDifference) -> tuple[int, int] | Non
             en_occ = structure_token_occurrence_index(en_tokens, anchor_pos)
             tr_occ = structure_token_occurrence_index(tr_tokens, anchor_pos)
             if en_occ is not None and tr_occ is not None:
-                line_en = resolve_issue_line_at_position(diff.english_rule, "structure", anchor_token, en_occ)
-                line_tr = resolve_issue_line_at_position(diff.translated_rule, "structure", anchor_token, tr_occ)
+                line_en = resolve_issue_line_at_position(diff.english_rule, DiffType.STRUCTURE, anchor_token, en_occ)
+                line_tr = resolve_issue_line_at_position(diff.translated_rule, DiffType.STRUCTURE, anchor_token, tr_occ)
                 if line_en is not None and line_tr is not None:
                     return line_en, line_tr
 
         # Fallback: anchor both sides to replace, which is the rule body entrypoint.
-        line_en = resolve_issue_line(diff.english_rule, "structure", "replace:") or diff.english_rule.line_number
-        line_tr = resolve_issue_line(diff.translated_rule, "structure", "replace:") or diff.translated_rule.line_number
+        line_en = resolve_issue_line(diff.english_rule, DiffType.STRUCTURE, "replace:") or diff.english_rule.line_number
+        line_tr = resolve_issue_line(diff.translated_rule, DiffType.STRUCTURE, "replace:") or diff.translated_rule.line_number
         return line_en, line_tr
 
     # Exact token available on both sides: resolve by occurrence index at mismatch.
     en_occ = structure_token_occurrence_index(en_tokens, mismatch_pos)
     tr_occ = structure_token_occurrence_index(tr_tokens, mismatch_pos)
     if en_occ is not None and tr_occ is not None:
-        line_en = resolve_issue_line_at_position(diff.english_rule, "structure", en_token, en_occ)
-        line_tr = resolve_issue_line_at_position(diff.translated_rule, "structure", tr_token, tr_occ)
+        line_en = resolve_issue_line_at_position(diff.english_rule, DiffType.STRUCTURE, en_token, en_occ)
+        line_tr = resolve_issue_line_at_position(diff.translated_rule, DiffType.STRUCTURE, tr_token, tr_occ)
         if line_en is not None and line_tr is not None:
             return line_en, line_tr
 
-    line_en = resolve_issue_line(diff.english_rule, "structure", en_token)
-    line_tr = resolve_issue_line(diff.translated_rule, "structure", tr_token)
+    line_en = resolve_issue_line(diff.english_rule, DiffType.STRUCTURE, en_token)
+    line_tr = resolve_issue_line(diff.translated_rule, DiffType.STRUCTURE, tr_token)
     if line_en is None or line_tr is None:
         return None
     return line_en, line_tr
@@ -149,7 +149,7 @@ def resolve_diff_lines(diff: RuleDifference) -> tuple[int | None, int | None] |
     This is the single entry point used by the renderer to avoid duplicating
     the structure vs non-structure branching logic.
     """
-    if diff.diff_type == "structure":
+    if diff.diff_type == DiffType.STRUCTURE:
         return resolve_structure_issue_lines(diff)
     return (
         resolve_issue_line(diff.english_rule, diff.diff_type),
diff --git a/PythonScripts/audit_translations/tests/conftest.py b/PythonScripts/audit_translations/tests/conftest.py
new file mode 100644
index 00000000..c58afaad
--- /dev/null
+++ b/PythonScripts/audit_translations/tests/conftest.py
@@ -0,0 +1,4 @@
+import sys
+
+# needed for running tests on Windows
+sys.stdout.reconfigure(encoding="utf-8")
diff --git a/PythonScripts/pyproject.toml b/PythonScripts/pyproject.toml
index b4d422a3..6f919ee0 100644
--- a/PythonScripts/pyproject.toml
+++ b/PythonScripts/pyproject.toml
@@ -10,7 +10,6 @@ license = "MIT"
 readme = "README.md"
 requires-python = ">=3.14"
 dependencies = [
-    "attrs",
     "beautifulsoup4",
     "deepl",
     "googletrans",
diff --git a/PythonScripts/uv.lock b/PythonScripts/uv.lock
index 9b09e2b8..993f3f44 100644
--- a/PythonScripts/uv.lock
+++ b/PythonScripts/uv.lock
@@ -14,15 +14,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
 ]
 
-[[package]]
-name = "attrs"
-version = "25.4.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/6b/5c/685e6633917e101e5dcb62b9dd76946cbb57c26e133bae9e0cd36033c0a9/attrs-25.4.0.tar.gz", hash = "sha256:16d5969b87f0859ef33a48b35d55ac1be6e42ae49d5e853b597db70c35c57e11", size = 934251, upload-time = "2025-10-06T13:54:44.725Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/3a/2a/7cc015f5b9f5db42b7d48157e23356022889fc354a2813c15934b7cb5c0e/attrs-25.4.0-py3-none-any.whl", hash = "sha256:adcf7e2a1fb3b36ac48d97835bb6d8ade15b8dcce26aba8bf1d14847b57a3373", size = 67615, upload-time = "2025-10-06T13:54:43.17Z" },
-]
-
 [[package]]
 name = "beautifulsoup4"
 version = "4.14.3"
@@ -283,7 +274,6 @@ name = "pythonscripts"
 version = "0.1.0"
 source = { editable = "." }
 dependencies = [
-    { name = "attrs" },
     { name = "beautifulsoup4" },
     { name = "deepl" },
     { name = "googletrans" },
@@ -302,7 +292,6 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "attrs" },
     { name = "beautifulsoup4" },
     { name = "deepl" },
     { name = "googletrans" },