diff --git a/scripts/latex_preprocessor.py b/scripts/latex_preprocessor.py
index 731465a75..b41d26fd5 100644
--- a/scripts/latex_preprocessor.py
+++ b/scripts/latex_preprocessor.py
@@ -113,12 +113,74 @@ def replace_si_unit(m: re.Match) -> str:
return text
+def _find_brace_content(text: str, start: int) -> tuple[str, int] | None:
+ """Find the content of a brace group starting at *start*, handling arbitrary nesting.
+
+ *start* must point to the opening ``{``. Returns ``(content, end)``
+ where *end* is the index just past the closing ``}``, or ``None`` if
+ the braces are unbalanced.
+ """
+ if start >= len(text) or text[start] != "{":
+ return None
+ depth = 1
+ i = start + 1
+ while i < len(text) and depth > 0:
+ if text[i] == "{":
+ depth += 1
+ elif text[i] == "}":
+ depth -= 1
+ i += 1
+ if depth != 0:
+ return None
+ # content excludes the outer braces
+ return text[start + 1 : i - 1], i
+
+
+_BRACKET_MACROS = {
+ "\\PB": ("\\left(", "\\right)"),
+ "\\RB": ("\\left[", "\\right]"),
+ "\\CB": ("\\left\\{", "\\right\\}"),
+}
+
+
+def _expand_all_bracket_macros(text: str) -> str:
+ r"""Expand all ``\PB``, ``\RB``, ``\CB`` macros in *text*, inside-out.
+
+ When an outer macro wraps inner macros (e.g. ``\PB{a \PB{b}}``) the
+ inner content is recursively expanded first, so the final result
+ contains no bracket macros regardless of nesting depth.
+ """
+ result: list[str] = []
+ i = 0
+ while i < len(text):
+ matched_macro = None
+ for macro in _BRACKET_MACROS:
+ if text[i:].startswith(macro + "{"):
+ matched_macro = macro
+ break
+ if matched_macro:
+ brace_start = i + len(matched_macro)
+ found = _find_brace_content(text, brace_start)
+ if found:
+ content, end = found
+ # Recursively expand any bracket macros inside the content
+ content = _expand_all_bracket_macros(content)
+ left, right = _BRACKET_MACROS[matched_macro]
+ result.append(f"{left} {content} {right}")
+ i = end
+ continue
+ result.append(text[i])
+ i += 1
+ return "".join(result)
+
+
def expand_bracket_macros(text: str) -> str:
- r"""Expand \PB{}, \RB{}, \CB{} bracket macros to standard LaTeX."""
- text = re.sub(r"\\PB" + _BRACE_RE, r"\\left( \1 \\right)", text)
- text = re.sub(r"\\RB" + _BRACE_RE, r"\\left[ \1 \\right]", text)
- text = re.sub(r"\\CB" + _BRACE_RE, r"\\left\\{ \1 \\right\\}", text)
- return text
+ r"""Expand \PB{}, \RB{}, \CB{} bracket macros to standard LaTeX.
+
+ Uses stack-based brace matching with recursion to handle arbitrary
+ nesting depth (e.g. ``\PB{\frac{\dot{m}_{a}}{\dot{m}_{b}}}``).
+ """
+ return _expand_all_bracket_macros(text)
def convert_callout_env(text: str) -> str:
diff --git a/scripts/markdown_postprocessor.py b/scripts/markdown_postprocessor.py
index 5b76d46ac..4db6411c5 100644
--- a/scripts/markdown_postprocessor.py
+++ b/scripts/markdown_postprocessor.py
@@ -156,23 +156,31 @@ def resolve(m: re.Match) -> str:
def clean_equation_labels(text: str) -> str:
- r"""Convert equation labels to MathJax \tag{} format."""
-
- # Pattern: equation with \label inside $$ blocks
- def add_tag(m: re.Match) -> str:
- eq_body = m.group(1)
- label_match = re.search(r'', eq_body)
- if label_match:
- label = label_match.group(1)
- # Remove the anchor from inside the equation
- eq_body = re.sub(r'\s*', "", eq_body)
- # Add \tag at the end of the equation
- eq_body = eq_body.rstrip()
- if not eq_body.endswith(r"\tag"):
- eq_body += f" \\tag{{{label}}}"
- return f"$$\n{eq_body}\n$$"
-
- return re.sub(r"\$\$\n(.*?)\n\$\$", add_tag, text, flags=re.DOTALL)
+ r"""Remove equation labels from display math blocks.
+
+ Labels appear in two forms depending on how Pandoc processes them:
+ 1. ```` — from the Lua filter's RawInline/RawBlock handler
+ 2. ``\label{label}`` — when Pandoc passes display math content verbatim
+
+ In both cases we strip the label. The label is already emitted as an
+ ```` anchor *before* the ``$$`` block (by the Lua filter's
+ RawBlock handler) or indexed in the label index, so keeping it inside
+ the math would only confuse MathJax.
+
+ Rather than trying to match full ``$$…$$`` blocks (which is fragile due
+ to adjacent inline-math creating false ``$$`` boundaries), we target
+ the specific patterns where labels appear:
+ - ``\label{…}$$`` — label immediately before a closing ``$$``
+ - ```` inside ``$$`` blocks (handled by Lua filter anchor)
+ """
+ # Strip \label{...} that appears before a closing $$ (with optional whitespace)
+ text = re.sub(r"\s*\\label\{[^}]+\}(\$\$)", r"\1", text)
+ # Strip \label{...} that appears after an opening $$ (on the same line)
+ text = re.sub(r"(\$\$)\\label\{[^}]+\}\s*", r"\1", text)
+ # Strip anchors inside equation blocks
+ # These appear on a line between $$ markers
+ text = re.sub(r'()\s*\n\n\$\$', "\n$$", text)
+ return text
def fix_heading_dashes(text: str) -> str:
@@ -192,12 +200,31 @@ def replace_dashes(m: re.Match) -> str:
return re.sub(r"^(#{1,6})\s+(.+)$", replace_dashes, text, flags=re.MULTILINE)
+def clean_pandoc_ref_attributes(text: str) -> str:
+ r"""Clean Pandoc's ``\ref{}`` output artifacts.
+
+ Pandoc converts ``\ref{label}`` to
+ ``[\[label\]](#label){reference-type="ref" reference="label"}``.
+ This function:
+ 1. Strips the ``{reference-type=... reference=...}`` attribute span.
+ 2. Unescapes the bracket notation in link text: ``\[label\]`` → ``label``.
+ """
+ # Strip {reference-type="..." reference="..."} attribute spans
+ text = re.sub(r'\{reference-type="[^"]*"\s+reference="[^"]*"\}', "", text)
+ # Clean escaped brackets in link text: [\[label\]] → [label]
+ # Only inside markdown links to avoid false positives
+ text = re.sub(r"\[\\\[([^\]]*?)\\\]\]", r"[\1]", text)
+ return text
+
+
def clean_pandoc_artifacts(text: str) -> str:
"""Remove Pandoc artifacts from the converted markdown."""
# Remove {.unnumbered} from headings
text = re.sub(r"\s*\{\.unnumbered\}", "", text)
# Remove {#sec:...} attributes that Pandoc adds
text = re.sub(r"\s*\{#[^}]+\}", "", text)
+ # Clean Pandoc \ref{} output (attribute spans and escaped brackets)
+ text = clean_pandoc_ref_attributes(text)
# Fix escaped underscores in non-math contexts
# (be careful not to break underscores in math mode)
# Only fix double-escaped underscores
@@ -213,6 +240,9 @@ def clean_empty_links(text: str) -> str:
"""Remove empty links and fix malformed link syntax."""
# Remove [](empty) links
text = re.sub(r"\[\]\([^)]*\)", "", text)
+ # Clean empty bracket artifacts in image alt text: ![caption []]( → 
+ text = re.sub(r"(!\[.*?)\s*\[\](\]\()", r"\1\2", text)
return text