From 22f300de02a9a04d465395d2081f1c78d979679d Mon Sep 17 00:00:00 2001
From: Eric Lee <ericleepi314@gmail.com>
Date: Sat, 16 May 2026 10:57:48 -0700
Subject: [PATCH] fix(image-handling): close 4 audit follow-ups from PR #155
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses the four pre-existing gaps the PR #155 audit surfaced:

1. validate_images_for_api ran only on the Anthropic-direct call_model
   path; all provider.chat_stream_response paths skipped it. Promoted
   into BaseProvider._prepare_messages so every provider validates
   client-side. query._call_model_sync catches ImageSizeError and
   surfaces a media_size assistant error (matches the existing
   server-side classification). agent_loop._call_provider_for_turn
   re-raises ImageSizeError instead of falling back to chat() (which
   would re-trigger the same validation). Validator also now walks
   images nested inside tool_result.content — post-PR #154 the Read
   tool returns image blocks there and the old walker missed them.

2. OpenAI tool->user split symmetric correlation. Tool message body
   now always carries [multimodal content for tool_use_id=X ...] when
   the synthetic role=user message follows; the user message leads
   with a [content for tool_use_id=X] text block so the link is
   visible from both directions. Empty content sentinel ([empty tool
   result]) guards the "non-empty content required" invariant for
   content=[] cases.

3. @file.pdf and other binary @-mentions no longer hit the text-mode
   open() that produced mojibake-in-system-reminder. Known binary
   extensions (pdf/zip/docx/...) plus a NUL-byte content sniff route
   to a new "binary" attachment kind with a Read-tool hint (PDF
   specifically mentions the pages parameter). BOM-aware decoder
   (_read_text_with_encoding) handles Windows-emitted UTF-16/UTF-32
   files via the BOM-aware codecs (utf-16, utf-32, utf-8-sig) so the
   BOM is consumed rather than preserved as a literal U+FEFF char.
   Post-decode garbled-text detector (NUL zero-tolerance + 3% U+FFFD
   threshold) falls back to a binary attachment when an adversarial
   spoofed-BOM blob would otherwise leak.

4. _anthropic_image_block_to_openai now has a sibling
   _anthropic_document_block_to_openai that translates
   Anthropic DocumentBlock (PDF) to OpenAI's {type:"file"} shape.
   Defensive: no production path currently produces DocumentBlock
   for OpenAI-compat, but future ones won't silently pass through.

Test plan:
- 94 new tests across test_at_mention_binary_files.py,
  test_base_provider_image_validation.py,
  test_agent_loop_image_size_propagation.py,
  test_openai_compat_document_translation.py, plus extensions to
  test_image_validation.py, test_openai_compat_image_translation.py,
  test_query_loop.py.
- Wider suite: 5015 pass / 10 pre-existing failures unchanged.
- Critic review loop: APPROVE after three rounds (round-2 caught a
  UTF-16 BOM regression I introduced; round-3 fixed it and added the
  post-decode garble detector).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 src/command_system/input_processing.py        | 251 +++++++++++++-
 src/providers/base.py                         |  24 +-
 src/providers/openai_compatible.py            | 180 ++++++++--
 src/query/query.py                            |  12 +
 src/repl/core.py                              |   8 +
 src/services/api/claude.py                    |   9 +-
 src/tool_system/agent_loop.py                 |   7 +
 src/utils/image_validation.py                 |  49 ++-
 .../test_agent_loop_image_size_propagation.py |  96 ++++++
 tests/test_at_mention_binary_files.py         | 313 ++++++++++++++++++
 tests/test_base_provider_image_validation.py  | 141 ++++++++
 tests/test_image_validation.py                |  81 +++++
 ...test_openai_compat_document_translation.py | 211 ++++++++++++
 tests/test_openai_compat_image_translation.py |  51 ++-
 tests/test_query_loop.py                      |  61 ++++
 15 files changed, 1450 insertions(+), 44 deletions(-)
 create mode 100644 tests/test_agent_loop_image_size_propagation.py
 create mode 100644 tests/test_at_mention_binary_files.py
 create mode 100644 tests/test_base_provider_image_validation.py
 create mode 100644 tests/test_openai_compat_document_translation.py

diff --git a/src/command_system/input_processing.py b/src/command_system/input_processing.py
index 3dcd024..83b0c98 100644
--- a/src/command_system/input_processing.py
+++ b/src/command_system/input_processing.py
@@ -171,6 +171,206 @@ def _extract_urls(text: str) -> list[str]:
 # @-mention pipeline and the Read tool agree on what counts as an image.
 _AT_MENTION_IMAGE_EXTENSIONS = frozenset({"png", "jpg", "jpeg", "gif", "webp"})
 
+# Extensions known to be binary. Opening one in text mode with
+# ``errors="replace"`` would emit a wall of utf-8 replacement chars that
+# the model latches onto and hallucinates from (the exact failure mode the
+# image branch was added to prevent). We skip the read and emit a small
+# system-reminder pointing the model at the Read tool instead. PDFs are
+# the user-visible case (Read tool supports them via ``pages``); the rest
+# are defense in depth so a stray @archive.zip doesn't pollute the prompt
+# either.
+_AT_MENTION_BINARY_EXTENSIONS = frozenset({
+    "pdf",
+    "zip", "tar", "gz", "tgz", "bz2", "xz", "7z", "rar",
+    "exe", "dll", "so", "dylib", "a", "o",
+    "bin", "iso", "dmg",
+    "docx", "xlsx", "pptx", "doc", "xls", "ppt",
+    "mp3", "wav", "flac", "ogg", "m4a",
+    "mp4", "mov", "avi", "mkv", "webm",
+    "ttf", "otf", "woff", "woff2",
+    "class", "jar", "war",
+    "pyc", "pyo",
+    "sqlite", "db",
+})
+
+# Heuristic chunk used by ``_looks_like_binary``: enough bytes to spot a
+# NUL byte in plausible binary headers without paying the full read cost
+# for huge files.
+_BINARY_SNIFF_BYTES = 8192
+
+
+# BOM signatures we know how to decode as text. Order matters: UTF-32
+# BOMs must be checked BEFORE the matching UTF-16 BOMs (UTF-32-LE BOM
+# `\xff\xfe\x00\x00` starts with the UTF-16-LE BOM `\xff\xfe`).
+#
+# Codec choice: use the BOM-aware codecs (``utf-16``, ``utf-32``) rather
+# than the explicit-byte-order forms (``utf-16-le``, ``utf-16-be``). The
+# latter PRESERVE the BOM as a literal ``﻿`` codepoint at the start
+# of the string; the BOM-aware codecs consume it. We picked the encoding
+# by reading the BOM, so feeding the file to a codec that re-reads it is
+# both correct and strips the otherwise-leaking zero-width-no-break-space.
+_BOM_ENCODINGS: tuple[tuple[bytes, str], ...] = (
+    (b"\xff\xfe\x00\x00", "utf-32"),
+    (b"\x00\x00\xfe\xff", "utf-32"),
+    (b"\xff\xfe", "utf-16"),
+    (b"\xfe\xff", "utf-16"),
+    (b"\xef\xbb\xbf", "utf-8-sig"),
+)
+
+
+def _detect_bom_encoding(head: bytes) -> str | None:
+    """Return the Python codec name implied by a BOM prefix, or None."""
+    for sig, enc in _BOM_ENCODINGS:
+        if head.startswith(sig):
+            return enc
+    return None
+
+
+def _looks_like_binary(path: str) -> bool:
+    """Return True if the first ``_BINARY_SNIFF_BYTES`` bytes contain a NUL
+    AND the file is not BOM-prefixed text.
+
+    Defense-in-depth check for files whose extension isn't in
+    ``_AT_MENTION_BINARY_EXTENSIONS`` — e.g. a misnamed ``.txt`` that's
+    actually a tarball, or any extension we haven't enumerated. NUL bytes
+    do not appear in well-formed utf-8 text, so their presence in a sniff
+    window is a high-precision signal.
+
+    Exception: UTF-16 / UTF-32 text files contain NUL bytes for every
+    ASCII char and would trip the naive NUL sniffer. A BOM prefix
+    (``\\xff\\xfe`` LE / ``\\xfe\\xff`` BE / UTF-32 variants) tells us
+    the file is real text that just needs a different decoder; we return
+    False so the text-read branch can decode it properly via
+    ``_read_text_with_encoding``. Without that paired fix this BOM
+    short-circuit would let UTF-16 mojibake flow into a system-reminder
+    (the exact failure mode the binary branch was added to prevent).
+
+    Conservative on read errors (treats unreadable files as "not binary"
+    so the existing text-read path can surface the OSError its caller
+    already handles).
+    """
+    try:
+        with open(path, "rb") as fh:
+            chunk = fh.read(_BINARY_SNIFF_BYTES)
+    except OSError:
+        return False
+    if _detect_bom_encoding(chunk) is not None:
+        # Encoded text — handled by the text branch with the right codec.
+        return False
+    return b"\x00" in chunk
+
+
+# If a decoded file has more than this fraction of U+FFFD replacement
+# chars, treat it as garbage rather than text. A real UTF-16 / UTF-8
+# text file produces well under 1% replacements even with `errors=replace`;
+# a binary blob with a fake BOM that doesn't pair to valid UTF-16
+# codepoints produces 10-50%+. Threshold picked to be generously high
+# so a UTF-16 file with a few corrupt bytes still ships its text, but
+# an adversarial binary blob with a spoofed BOM doesn't leak mojibake
+# into the prompt.
+_MAX_REPLACEMENT_CHAR_FRACTION = 0.03
+
+
+def _decoded_text_looks_garbled(decoded: str) -> bool:
+    """Return True if the decoded string looks like binary garbage rather
+    than legitimate text. Two heuristics:
+
+    1. **NUL char presence (zero-tolerance).** A real text file has zero
+       U+0000 characters. If we decoded any, the source bytes contained
+       NULs that the codec mapped to U+0000 — i.e. binary garbage that
+       happened to align with a valid UTF-16 NUL pair. Letting these
+       through would re-introduce Bug A's NUL-in-system-reminder
+       failure: when the str round-trips through utf-8 encoding for the
+       API, those U+0000 chars become literal NUL bytes in the prompt.
+
+    2. **U+FFFD fraction (high-confidence garbage signal).** Above
+       ``_MAX_REPLACEMENT_CHAR_FRACTION`` the file isn't the encoding
+       the BOM claimed.
+
+    Residual cases the heuristic does NOT catch: a binary blob with a
+    spoofed BOM whose every 2-byte pair decodes to a valid non-NUL
+    Unicode codepoint (e.g. all bytes in a printable plane). These
+    appear as walls of CJK or symbol characters — not Bug A's failure
+    family (no NULs, no replacement chars), and the model lacks
+    recognizable ASCII fragments to hallucinate from. Accepting this
+    as residual risk; a more aggressive detector would false-positive
+    on legitimate non-Latin-script text files.
+    """
+    if not decoded:
+        return False
+    if "\x00" in decoded:
+        return True
+    replacement_count = decoded.count("�")
+    if replacement_count == 0:
+        return False
+    return (replacement_count / len(decoded)) > _MAX_REPLACEMENT_CHAR_FRACTION
+
+
+def _read_text_with_encoding(path: str) -> str | None:
+    """Read ``path`` as text, picking a decoder from any leading BOM.
+
+    Falls back to utf-8 with ``errors="replace"`` for the common case
+    (no BOM). When a BOM is present we use the codec it implies (with
+    ``errors="replace"`` so a malformed trailing byte doesn't bring
+    down the whole read).
+
+    Post-decode garbage check: if the decoded text is mostly U+FFFD
+    replacement characters, returns ``None`` so the caller drops the
+    attachment instead of inlining mojibake. This closes the adversarial
+    case where a binary blob starts with `\\xff\\xfe` (fake UTF-16 BOM)
+    and an extension we haven't enumerated — without the check, the
+    BOM short-circuit would route the blob to the text branch and the
+    `errors="replace"` decode would land replacement-char mojibake in
+    the prompt (same failure family as Bug A even though no NULs leak).
+
+    Returns ``None`` on OSError or on garbled-decode so the caller
+    matches the existing "drop this attachment silently" behaviour for
+    unreadable files. Callers should still emit a binary-style reminder
+    via the @-mention `binary` attachment kind when this returns None
+    for a non-OSError reason — see ``expand_at_mentions``.
+    """
+    try:
+        with open(path, "rb") as fh:
+            head = fh.read(4)
+    except OSError:
+        return None
+    encoding = _detect_bom_encoding(head) or "utf-8"
+    try:
+        with open(path, "r", encoding=encoding, errors="replace") as fh:
+            data = fh.read()
+    except OSError:
+        return None
+    except (LookupError, UnicodeError):
+        # Pathological codec name from a future BOM check, or a UTF-32
+        # decode failure with errors=replace (rare but theoretically
+        # possible). Fall back to a raw utf-8 replacement read so the
+        # path still produces *some* text rather than a silent drop.
+        try:
+            with open(path, "r", encoding="utf-8", errors="replace") as fh:
+                data = fh.read()
+        except OSError:
+            return None
+    if _decoded_text_looks_garbled(data):
+        return None
+    return data
+
+
+def _binary_hint_for_ext(ext: str) -> str:
+    """Return a per-extension hint the model can act on, or a generic
+    fallback. PDF specifically points at the Read tool's ``pages``
+    parameter, mirroring TS's behaviour where PDFs are surfaced through
+    Read rather than auto-inlined via @-mention."""
+    if ext == "pdf":
+        return (
+            "PDFs cannot be inlined as @-mention text. Use the Read tool "
+            "with the ``pages`` parameter (e.g. ``pages=\"1-5\"``) to view "
+            "specific page ranges."
+        )
+    return (
+        "This file is binary and cannot be inlined as text. Use the Read "
+        "tool to view it if it is a supported format."
+    )
+
 
 def _try_build_image_attachment(
     expanded: str,
@@ -359,10 +559,43 @@ def expand_at_mentions(
                     # contains the path, so the model can call ``Read``
                     # explicitly to see a real error.
                     continue
-                try:
-                    with open(expanded, "r", encoding="utf-8", errors="replace") as fh:
-                        data = fh.read()
-                except OSError:
+                # Binary branch: known binary extensions (PDF, archives, ...)
+                # OR a content sniff that finds a NUL byte. Same Bug A
+                # pattern as the image branch -- opening a binary file in
+                # text mode produces mojibake the model hallucinates from.
+                # We emit a small reminder pointing at the Read tool
+                # instead. PDF gets a more specific hint because Read
+                # supports it via the ``pages`` parameter; everything else
+                # gets a generic message.
+                if ext in _AT_MENTION_BINARY_EXTENSIONS or _looks_like_binary(expanded):
+                    attachments.append(
+                        {
+                            "kind": "binary",
+                            "path": expanded,
+                            "display_path": display_path,
+                            "ext": ext,
+                            "hint": _binary_hint_for_ext(ext),
+                        }
+                    )
+                    continue
+                data = _read_text_with_encoding(expanded)
+                if data is None:
+                    # ``_read_text_with_encoding`` returns None on either
+                    # OSError OR when the decoded text was mostly U+FFFD
+                    # replacement chars (a binary blob with a spoofed
+                    # BOM, or a file in an encoding we can't auto-detect).
+                    # Emit a binary attachment instead of dropping
+                    # silently so the user sees what happened and the
+                    # model gets a Read-tool nudge.
+                    attachments.append(
+                        {
+                            "kind": "binary",
+                            "path": expanded,
+                            "display_path": display_path,
+                            "ext": ext,
+                            "hint": _binary_hint_for_ext(ext),
+                        }
+                    )
                     continue
                 attachments.append(
                     {
@@ -412,6 +645,16 @@ def format_at_mention_attachments(attachments: list[dict[str, Any]]) -> str:
         elif kind == "image":
             # See docstring -- handled separately as a content block.
             continue
+        elif kind == "binary":
+            # PDFs / archives / docx / ... — emit a short reminder pointing
+            # the model at the Read tool so the @-mention is acknowledged
+            # without flooding the prompt with mojibake.
+            blocks.append(
+                f"<system-reminder>\n"
+                f"@-mentioned file {att['display_path']} is a binary file "
+                f"and was not inlined. {att.get('hint', '')}\n"
+                f"</system-reminder>"
+            )
         elif kind == "agent_mention":
             # Mirrors ``typescript/src/utils/messages.ts`` ``agent_mention``
             # case: the reminder nudges the model to delegate to the named
diff --git a/src/providers/base.py b/src/providers/base.py
index 8e40cf9..163cef6 100644
--- a/src/providers/base.py
+++ b/src/providers/base.py
@@ -144,5 +144,25 @@ def _get_model(self, **kwargs) -> str:
         return strip_1m_context_suffix(raw) if raw else raw
 
     def _prepare_messages(self, messages: list[MessageInput]) -> list[dict[str, Any]]:
-        """Convert provider messages to API dictionary format."""
-        return [msg if isinstance(msg, dict) else msg.to_dict() for msg in messages]
+        """Convert provider messages to API dictionary format.
+
+        Also runs ``validate_images_for_api`` so every provider — not just
+        the Anthropic-direct ``services/api/claude.py:call_model`` path —
+        rejects oversize base64 images before the network round trip.
+        Anthropic's 5 MB hard limit applies to its own provider; for
+        OpenAI-compatible providers the check runs on the still-Anthropic
+        shape (subclasses call ``super()._prepare_messages`` before
+        translating to ``image_url``), so the same client-side guard
+        applies. Providers without image support are unaffected: the
+        walker only inspects ``type=image`` blocks. Raises
+        ``ImageSizeError``; callers (``query._call_model_sync``,
+        ``tool_system.agent_loop._call_provider_for_turn``) translate it
+        into a media-size error message rather than letting it surface as
+        an opaque API failure.
+        """
+        prepared = [msg if isinstance(msg, dict) else msg.to_dict() for msg in messages]
+        # Local import to avoid a top-level dependency from base.py into
+        # the utils package, matching the style of ``_get_model``.
+        from src.utils.image_validation import validate_images_for_api
+        validate_images_for_api(prepared)
+        return prepared
diff --git a/src/providers/openai_compatible.py b/src/providers/openai_compatible.py
index 002803d..16d7b38 100644
--- a/src/providers/openai_compatible.py
+++ b/src/providers/openai_compatible.py
@@ -47,12 +47,79 @@ def _anthropic_image_block_to_openai(block: dict[str, Any]) -> dict[str, Any] |
     }
 
 
+def _anthropic_document_block_to_openai(block: dict[str, Any]) -> dict[str, Any] | None:
+    """Translate an Anthropic document content block (PDF) to OpenAI's
+    ``file`` content shape.
+
+    Anthropic: ``{"type": "document", "source": {"type": "base64",
+    "media_type": "application/pdf", "data": "..."}}``.
+    OpenAI:    ``{"type": "file", "file": {"filename": "document.pdf",
+    "file_data": "data:application/pdf;base64,..."}}``.
+
+    No production path currently produces ``DocumentBlock`` for an
+    OpenAI-compatible provider (PDFs flow through Read tool's
+    ``_read_map_result_to_api`` as text). This translator exists so that
+    if ``DocumentBlock`` ever shows up — e.g. a future @-mention path or
+    a third-party tool returning a PDF — it's converted to the closest
+    OpenAI shape instead of silently passing through as an unrecognised
+    Anthropic-shape block (which OpenAI-compat providers either reject
+    or drop). Mirrors the ``image`` translator's defensive contract.
+
+    Returns ``None`` when the block isn't a recognisable Anthropic
+    document block (caller should keep the original).
+    """
+    if not isinstance(block, dict) or block.get("type") != "document":
+        return None
+    source = block.get("source")
+    if not isinstance(source, dict):
+        return None
+    if source.get("type") != "base64":
+        return None
+    media_type = source.get("media_type") or "application/pdf"
+    data = source.get("data")
+    if not data or not isinstance(data, str):
+        # Same producer-bug guard as the image translator: an empty
+        # ``data`` field would produce ``data:application/pdf;base64,``
+        # which the server rejects. Return ``None`` so the caller keeps
+        # the original block and the upstream serializer surfaces the
+        # malformed shape instead of producing a request the server will
+        # fail confusingly.
+        return None
+    # OpenAI's ``file`` block accepts an optional ``filename``; many
+    # provider impls require it. Use a stable default since Anthropic's
+    # document source carries no filename hint.
+    return {
+        "type": "file",
+        "file": {
+            "filename": "document.pdf",
+            "file_data": f"data:{media_type};base64,{data}",
+        },
+    }
+
+
+def _translate_anthropic_multimodal_block(block: Any) -> dict[str, Any] | None:
+    """Try every Anthropic-shape multimodal translator on ``block``.
+
+    Returns the OpenAI-shape replacement, or ``None`` when the block isn't
+    a translatable multimodal type (text / unknown blocks pass through
+    untouched at the call site). Centralised so the converter doesn't
+    grow a long chain of ``if isinstance/elif`` branches as new
+    multimodal shapes appear.
+    """
+    if not isinstance(block, dict):
+        return None
+    translated = _anthropic_image_block_to_openai(block)
+    if translated is not None:
+        return translated
+    return _anthropic_document_block_to_openai(block)
+
+
 def _convert_anthropic_messages_to_openai(
     messages: list[dict[str, Any]],
 ) -> list[dict[str, Any]]:
     """Convert Anthropic-format messages to OpenAI chat-completion format.
 
-    Handles three transformations:
+    Handles four transformations:
     1. Assistant messages with tool_use content blocks → assistant + tool_calls
     2. User messages with tool_result content blocks → role=tool messages
     3. Anthropic image content blocks (in user messages or tool_result
@@ -60,6 +127,11 @@ def _convert_anthropic_messages_to_openai(
        both @image.png @-mentions and Read-tool image returns ship
        Anthropic-shape blocks; without translation, OpenAI-compatible
        providers either reject the request or silently drop the image.
+    4. Anthropic document content blocks → OpenAI ``file`` blocks.
+       Defensive translation: no production path currently produces
+       ``DocumentBlock`` for an OpenAI-compatible provider, but if one
+       ever appears it lands in the closest OpenAI shape rather than
+       passing through as an unrecognised Anthropic block.
     """
     result: list[dict[str, Any]] = []
     for msg in messages:
@@ -131,9 +203,10 @@ def _convert_anthropic_messages_to_openai(
                 if isinstance(block, dict) and block.get("type") == "tool_result":
                     tool_results.append(block)
                 else:
-                    # Translate Anthropic image blocks to OpenAI ``image_url``
-                    # shape; pass everything else through unchanged.
-                    translated = _anthropic_image_block_to_openai(block) if isinstance(block, dict) else None
+                    # Translate Anthropic multimodal blocks (image / document)
+                    # to their OpenAI shapes; pass everything else through
+                    # unchanged.
+                    translated = _translate_anthropic_multimodal_block(block)
                     non_tool_blocks.append(translated if translated is not None else block)
 
             # Emit non-tool content first as user message
@@ -151,17 +224,17 @@ def _convert_anthropic_messages_to_openai(
                 # blocks via ``image_url``. The model sees the image
                 # alongside the tool result, which is the closest semantic
                 # match to Anthropic's native multimodal tool_result.
-                image_blocks_from_tool: list[dict[str, Any]] = []
+                multimodal_blocks_from_tool: list[dict[str, Any]] = []
                 if isinstance(raw_content, list):
                     # Flatten nested content blocks to text
                     parts = []
                     for item in raw_content:
                         if isinstance(item, dict) and item.get("type") == "text":
                             parts.append(item.get("text", ""))
-                        elif isinstance(item, dict) and item.get("type") == "image":
-                            translated_img = _anthropic_image_block_to_openai(item)
-                            if translated_img is not None:
-                                image_blocks_from_tool.append(translated_img)
+                        elif isinstance(item, dict) and item.get("type") in ("image", "document"):
+                            translated_multimodal = _translate_anthropic_multimodal_block(item)
+                            if translated_multimodal is not None:
+                                multimodal_blocks_from_tool.append(translated_multimodal)
                         elif isinstance(item, str):
                             parts.append(item)
                         else:
@@ -172,30 +245,85 @@ def _convert_anthropic_messages_to_openai(
                 else:
                     flat_content = str(raw_content)
 
-                # OpenAI requires ``content`` to be a non-empty string on
-                # tool messages; if the original was image-only, leave a
-                # short pointer so the tool_call is paired with something.
-                # The follow-up ``role=user`` message carrying the
-                # ``image_url`` block is NOT linked to ``tool_call_id`` --
-                # OpenAI's wire format has no way to attach a tool_call_id
-                # to a user message. The model sees ``tool(text result)``
-                # followed by ``user(image)`` and may briefly treat the
-                # trailing user message as a new prompt. The placeholder
-                # text reduces that risk, but it's a known OpenAI-API
-                # limitation we cannot fully paper over; on Anthropic the
-                # equivalent stays as a single multimodal tool_result.
-                if not flat_content and image_blocks_from_tool:
-                    flat_content = "[image content delivered in the following message]"
+                # KNOWN OPENAI-API LIMITATION — tool→user split:
+                # ----------------------------------------------------
+                # OpenAI's wire format requires ``content`` on a tool
+                # message to be a non-empty string AND does not allow
+                # image_url / file blocks in a tool message. So when
+                # an Anthropic tool_result carries multimodal content,
+                # we must split it: emit ``role=tool`` with the text
+                # body (or a placeholder if none), then a synthetic
+                # ``role=user`` carrying the translated multimodal
+                # blocks. The synthetic user message CANNOT be linked
+                # back to its parent tool_call_id — OpenAI provides no
+                # wire-level mechanism for that. The model sees
+                # ``tool(text)`` followed by ``user(multimodal)`` and
+                # could briefly treat the trailing user message as a
+                # new prompt rather than the tool's payload.
+                #
+                # Mitigations applied here:
+                #   1. Tool message carries an explicit "see following
+                #      message" suffix (image-only) or a "see also next
+                #      message" line (text+image) referencing the
+                #      tool_use_id. This is symmetric across both the
+                #      image-only and text+image branches so the
+                #      correlation hint is always present when a split
+                #      happened.
+                #   2. Synthetic user message starts with a tiny text
+                #      block that names the tool_use_id. The image /
+                #      file blocks follow. The model now has the link
+                #      visible from BOTH directions even though
+                #      ``tool_call_id`` only exists on the tool message.
+                # On Anthropic the equivalent stays a single
+                # multimodal tool_result with no split; there is no
+                # equivalent Anthropic-side limitation.
+                tool_use_id = tr.get("tool_use_id", "")
+                if multimodal_blocks_from_tool:
+                    correlation = (
+                        f"[multimodal content for tool_use_id={tool_use_id} "
+                        "delivered in the following message]"
+                    )
+                    if flat_content:
+                        # Text + multimodal: append a one-line pointer so
+                        # the tool message carries the same correlation
+                        # marker the image-only branch produces.
+                        flat_content = f"{flat_content}\n\n{correlation}"
+                    else:
+                        # Image-only tool_result: the pointer is the
+                        # whole tool-message body. OpenAI rejects empty
+                        # tool-message content, so this also doubles as
+                        # the non-empty guard.
+                        flat_content = correlation
+                if not flat_content:
+                    # Defensive: if there were no multimodal blocks AND
+                    # no text body (e.g. ``content: []`` or ``content: ""``
+                    # — the converter's fallthrough JSON-dumps unknown
+                    # block types into ``flat_content`` so those don't
+                    # land here), emit a literal sentinel so OpenAI's
+                    # "tool message content must be non-empty"
+                    # requirement is honoured.
+                    flat_content = "[empty tool result]"
 
                 result.append({
                     "role": "tool",
-                    "tool_call_id": tr.get("tool_use_id", ""),
+                    "tool_call_id": tool_use_id,
                     "content": flat_content,
                 })
-                if image_blocks_from_tool:
+                if multimodal_blocks_from_tool:
+                    # Lead with a tiny text block naming the parent
+                    # tool_use_id so the model can correlate this
+                    # synthetic user message back to the prior
+                    # ``role=tool`` message even though OpenAI's wire
+                    # format gives no tool_call_id on user messages.
+                    correlation_text = {
+                        "type": "text",
+                        "text": (
+                            f"[content for tool_use_id={tool_use_id}]"
+                        ),
+                    }
                     result.append({
                         "role": "user",
-                        "content": image_blocks_from_tool,
+                        "content": [correlation_text, *multimodal_blocks_from_tool],
                     })
             continue
 
diff --git a/src/query/query.py b/src/query/query.py
index 68b2175..dc87f46 100644
--- a/src/query/query.py
+++ b/src/query/query.py
@@ -22,6 +22,7 @@
 from ..tool_system.protocol import ToolCall, ToolResult
 from ..tool_system.registry import ToolRegistry
 from ..utils.abort_controller import AbortController, AbortError
+from ..utils.image_validation import ImageSizeError
 from ..providers.base import BaseProvider, ChatResponse
 
 from .config import QueryConfig, build_query_config
@@ -405,6 +406,17 @@ async def _call_model_sync(
         # to those substring checks could accidentally match an abort
         # reason and convert the cancel into a model-error reply.
         raise
+    except ImageSizeError as e:
+        # Client-side pre-API validation tripped (BaseProvider._prepare_messages).
+        # Surface as a media_size error with the same classification the
+        # server-side guard uses, so the reactive-compact recovery path
+        # (Ch5/B.2) treats them identically.
+        err_msg = _create_assistant_api_error_message(
+            f"Media too large: {e}",
+            error="media_size",
+        )
+        err_msg._api_error = "media_size"  # type: ignore[attr-defined]
+        return [err_msg], []
     except Exception as e:
         if _diag:
             logger.warning("[DIAG] _call_model_sync: EXCEPTION after %.1fs: %s", time.monotonic() - _t0, e)
diff --git a/src/repl/core.py b/src/repl/core.py
index c658598..bef11ea 100644
--- a/src/repl/core.py
+++ b/src/repl/core.py
@@ -2316,6 +2316,14 @@ def chat(self, user_input: str, max_turns: int | None = None):
                     self.console.print(
                         f"[dim]  ⎿  Read image {att['display_path']}[/dim]"
                     )
+                elif kind == "binary":
+                    # Binary file (PDF, archive, ...) — show what happened
+                    # so the user isn't surprised that no content was
+                    # inlined. The reminder text already nudges the model
+                    # toward the Read tool.
+                    self.console.print(
+                        f"[dim]  ⎿  Skipped binary file {att['display_path']}[/dim]"
+                    )
                 else:
                     label = "directory" if kind == "directory" else "file"
                     self.console.print(
diff --git a/src/services/api/claude.py b/src/services/api/claude.py
index 19592a4..67a60c9 100644
--- a/src/services/api/claude.py
+++ b/src/services/api/claude.py
@@ -293,10 +293,11 @@ async def call_model(
         # instead of a generic API failure. Mirrors TS
         # validateImagesForAPI invocation at utils/imageValidation.ts.
         #
-        # TODO: only the Anthropic provider path runs this check.
-        # src/providers/{openai_compatible,anthropic_provider,minimax_provider}.py
-        # all bypass it. Promote validation into BaseProvider._prepare_messages
-        # if/when those providers grow image-content-block support.
+        # This path uses the anthropic SDK directly (not BaseProvider),
+        # so we still validate here. Provider-based paths
+        # (chat_stream_response / chat) validate inside
+        # BaseProvider._prepare_messages and surface ImageSizeError via
+        # query._call_model_sync's media_size handler.
         try:
             from src.utils.image_validation import ImageSizeError, validate_images_for_api
             validate_images_for_api(api_messages)
diff --git a/src/tool_system/agent_loop.py b/src/tool_system/agent_loop.py
index c2cc1c5..7844ca5 100644
--- a/src/tool_system/agent_loop.py
+++ b/src/tool_system/agent_loop.py
@@ -16,6 +16,7 @@
 from ..providers.anthropic_provider import AnthropicProvider
 from ..providers.minimax_provider import MinimaxProvider
 from ..utils.abort_controller import AbortError, AbortSignal
+from ..utils.image_validation import ImageSizeError
 
 
 def _is_anthropic_provider(provider: BaseProvider) -> bool:
@@ -176,6 +177,12 @@ def _call_provider_for_turn(
             # User-initiated cancel must propagate; do not fall through
             # to the non-streaming code path.
             raise
+        except ImageSizeError:
+            # Pre-API client-side validation tripped — falling back to
+            # the non-streaming path would re-run ``_prepare_messages``
+            # and raise the same error. Propagate so the outer loop can
+            # surface a media_size error message instead of double-call.
+            raise
         except Exception:
             # Preserve existing stable behavior if streaming is unsupported or fails.
             pass
diff --git a/src/utils/image_validation.py b/src/utils/image_validation.py
index 7b0a5ed..d76c708 100644
--- a/src/utils/image_validation.py
+++ b/src/utils/image_validation.py
@@ -76,11 +76,56 @@ def _get_image_source_data(block: Any) -> str | None:
     return data if isinstance(data, str) else None
 
 
+# Cap on nested tool_result depth the walker will descend into. The
+# Anthropic API doesn't accept arbitrarily deep nesting in production
+# (tool_result content is normally one level deep); the limit guards a
+# pathologically constructed message from blowing Python's recursion
+# limit. Pick a value comfortably above any realistic depth.
+_MAX_TOOL_RESULT_DEPTH = 32
+
+
+def _iter_image_blocks(content: Any, _depth: int = 0):
+    """Yield every image block reachable from ``content``.
+
+    Walks top-level blocks AND descends into ``tool_result`` block content
+    lists. Required because the Read tool now returns images inside a
+    tool_result's ``content`` (post-#154/#155 image-handling parity); a
+    walker that only looks at the outer user message would miss them and
+    let an oversized base64 reach the API.
+
+    Recursion is bounded by ``_MAX_TOOL_RESULT_DEPTH`` so an adversarial
+    or accidentally over-nested message cannot hit Python's default
+    recursion limit (typically ~1000). Production tool_result nesting is
+    single-digit; the cap is generous defense-in-depth. Depth-first
+    yield order matches a reader's mental model of the message: for
+    ``[ImgA, ToolResult(ImgB), ImgC]`` the walker yields A, B, C, which
+    matches the index numbering used in ``ImageSizeError``.
+    """
+    if _depth > _MAX_TOOL_RESULT_DEPTH:
+        return
+    for block in _iter_content_blocks(content):
+        btype = _get_block_type(block)
+        if btype == "image":
+            yield block
+        elif btype == "tool_result":
+            # tool_result.content is either a string (no images) or a
+            # list of blocks; recurse so we descend in document order.
+            if isinstance(block, dict):
+                inner = block.get("content")
+            else:
+                inner = getattr(block, "content", None)
+            if isinstance(inner, list):
+                yield from _iter_image_blocks(inner, _depth + 1)
+
+
 def validate_images_for_api(messages: list[Any]) -> None:
     """Walk ``messages`` and raise ImageSizeError if any image is too large.
 
     Each image block's base64 string length is compared to
     ``API_IMAGE_MAX_BASE64_SIZE`` (5 MB). Mirrors TS imageValidation.ts:52-105.
+
+    Recursion covers tool_result-nested images so the Read tool's image
+    return path is also guarded; see ``_iter_image_blocks``.
     """
     oversized: list[tuple[int, int]] = []
     for msg in messages:
@@ -89,9 +134,7 @@ def validate_images_for_api(messages: list[Any]) -> None:
             content = msg.get("content")
         else:
             content = getattr(msg, "content", None)
-        for block in _iter_content_blocks(content):
-            if _get_block_type(block) != "image":
-                continue
+        for block in _iter_image_blocks(content):
             data = _get_image_source_data(block)
             if data is None:
                 continue
diff --git a/tests/test_agent_loop_image_size_propagation.py b/tests/test_agent_loop_image_size_propagation.py
new file mode 100644
index 0000000..c181faa
--- /dev/null
+++ b/tests/test_agent_loop_image_size_propagation.py
@@ -0,0 +1,96 @@
+"""Pin that ``_call_provider_for_turn`` re-raises ``ImageSizeError``
+instead of swallowing it via the generic ``except Exception`` fallback.
+
+If the streaming path's ``_prepare_messages`` raises ``ImageSizeError``,
+falling back to ``provider.chat()`` is pointless: ``chat()`` calls the
+same ``_prepare_messages`` and will raise again. Propagating the
+streaming-path exception lets the outer query loop translate it into a
+clean media_size error message immediately.
+"""
+
+from __future__ import annotations
+
+import unittest
+from typing import Any
+from unittest.mock import MagicMock
+
+from src.providers.base import ChatResponse
+from src.tool_system.agent_loop import _call_provider_for_turn
+from src.utils.image_validation import ImageSizeError
+
+
+class TestImageSizeErrorPropagation(unittest.TestCase):
+    def _make_provider(self, stream_exc: Exception, chat_response: Any = None) -> MagicMock:
+        provider = MagicMock()
+        provider.chat_stream_response.side_effect = stream_exc
+        if chat_response is None:
+            # Should not be reached in the propagation test; trip if so.
+            provider.chat.side_effect = AssertionError(
+                "chat() should not be invoked after streaming ImageSizeError"
+            )
+        else:
+            provider.chat.return_value = chat_response
+        return provider
+
+    def test_streaming_imagesize_propagates_without_chat_fallback(self) -> None:
+        oversize = ImageSizeError([(6 * 1024 * 1024, 5 * 1024 * 1024)])
+        provider = self._make_provider(oversize)
+        with self.assertRaises(ImageSizeError):
+            _call_provider_for_turn(
+                provider=provider,
+                api_messages=[{"role": "user", "content": "x"}],
+                call_kwargs={},
+                stream=True,
+                on_text_chunk=None,
+            )
+        # chat() must NOT have been called because we shouldn't double-attempt
+        provider.chat.assert_not_called()
+
+    def test_streaming_not_implemented_still_falls_back_to_chat(self) -> None:
+        """Regression: only ImageSizeError is the new re-raise. Existing
+        fallback behavior for NotImplementedError must stay intact."""
+        ok = ChatResponse(
+            content="hi",
+            model="test",
+            usage={"input_tokens": 1, "output_tokens": 1},
+            finish_reason="end_turn",
+            tool_uses=None,
+        )
+        provider = self._make_provider(NotImplementedError(), chat_response=ok)
+        response, streamed = _call_provider_for_turn(
+            provider=provider,
+            api_messages=[{"role": "user", "content": "x"}],
+            call_kwargs={},
+            stream=True,
+            on_text_chunk=None,
+        )
+        self.assertEqual(response.content, "hi")
+        self.assertFalse(streamed)
+        provider.chat.assert_called_once()
+
+    def test_streaming_generic_exception_still_falls_back_to_chat(self) -> None:
+        """Regression: pre-existing ``except Exception: pass`` fallback must
+        keep working for unrelated stream-time errors so user behavior is
+        unchanged for transient SDK glitches."""
+        ok = ChatResponse(
+            content="hi",
+            model="test",
+            usage={"input_tokens": 1, "output_tokens": 1},
+            finish_reason="end_turn",
+            tool_uses=None,
+        )
+        provider = self._make_provider(RuntimeError("stream blew up"), chat_response=ok)
+        response, streamed = _call_provider_for_turn(
+            provider=provider,
+            api_messages=[{"role": "user", "content": "x"}],
+            call_kwargs={},
+            stream=True,
+            on_text_chunk=None,
+        )
+        self.assertEqual(response.content, "hi")
+        self.assertFalse(streamed)
+        provider.chat.assert_called_once()
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_at_mention_binary_files.py b/tests/test_at_mention_binary_files.py
new file mode 100644
index 0000000..4dfa1a8
--- /dev/null
+++ b/tests/test_at_mention_binary_files.py
@@ -0,0 +1,313 @@
+"""Tests that ``@file.pdf`` and other binary @-mentions are routed to a
+"binary" attachment instead of being opened in text mode (the Bug A
+pattern that produced mojibake the model hallucinated from).
+
+PDF in TS is surfaced through the Read tool's ``pages`` parameter, not
+auto-inlined via @-mention. Python now matches: we emit a small system
+reminder pointing the model at Read, rather than dumping replacement
+chars into the prompt.
+
+Coverage:
+- Known binary extensions (.pdf, .zip, .docx, ...) skip the text read.
+- PDF reminder mentions the Read tool's ``pages`` parameter specifically.
+- Files with NUL bytes are caught by the content sniffer even when their
+  extension isn't enumerated (defense in depth).
+- Normal text files still flow through the unchanged ``kind="file"`` path.
+- ``format_at_mention_attachments`` renders the new "binary" kind without
+  leaking content bytes.
+"""
+
+from __future__ import annotations
+
+import os
+import tempfile
+import unittest
+from pathlib import Path
+
+from src.command_system.input_processing import (
+    expand_at_mentions,
+    format_at_mention_attachments,
+)
+
+
+def _render(atts: list[dict]) -> bytes:
+    """Helper: render attachments and return the bytes that would be
+    embedded in the prompt. Tests assert on the bytes (not the str) so
+    NUL chars and utf-8 replacement chars are detectable.
+    """
+    return format_at_mention_attachments(atts).encode("utf-8", errors="surrogatepass")
+
+
+class TestPdfAtMention(unittest.TestCase):
+    def setUp(self) -> None:
+        self.tmp = tempfile.TemporaryDirectory()
+        self.cwd = self.tmp.name
+
+    def tearDown(self) -> None:
+        self.tmp.cleanup()
+
+    def _write(self, name: str, data: bytes) -> str:
+        path = os.path.join(self.cwd, name)
+        Path(path).write_bytes(data)
+        return path
+
+    def test_pdf_extension_routes_to_binary_attachment(self) -> None:
+        # Synthetic PDF magic header + opaque bytes — never opened as text.
+        self._write("doc.pdf", b"%PDF-1.7\n\x00\x01\x02\x03 lorem")
+        _, atts = expand_at_mentions("look at @doc.pdf", cwd=self.cwd)
+        self.assertEqual(len(atts), 1)
+        att = atts[0]
+        self.assertEqual(att["kind"], "binary")
+        self.assertEqual(att["ext"], "pdf")
+        self.assertEqual(att["display_path"], "doc.pdf")
+        # PDF hint must mention Read tool + pages parameter so the model
+        # has a concrete next step.
+        self.assertIn("Read tool", att["hint"])
+        self.assertIn("pages", att["hint"])
+
+    def test_pdf_attachment_does_not_leak_bytes(self) -> None:
+        """Critical: the binary attachment must NOT carry the file content.
+        Bug A's failure mode was shipping mojibake into the prompt; the
+        whole point of the binary branch is to avoid that."""
+        self._write("doc.pdf", b"%PDF-1.7\n\x00binarygarbagehere")
+        _, atts = expand_at_mentions("@doc.pdf", cwd=self.cwd)
+        att = atts[0]
+        self.assertNotIn("content", att)
+        rendered = format_at_mention_attachments(atts)
+        self.assertNotIn("binarygarbagehere", rendered)
+        # The reminder structure should be intact and reference the file.
+        self.assertIn("<system-reminder>", rendered)
+        self.assertIn("doc.pdf", rendered)
+        self.assertIn("binary", rendered.lower())
+
+    def test_archive_extension_routes_to_binary(self) -> None:
+        self._write("payload.zip", b"PK\x03\x04 archive bytes here")
+        _, atts = expand_at_mentions("@payload.zip", cwd=self.cwd)
+        self.assertEqual(len(atts), 1)
+        self.assertEqual(atts[0]["kind"], "binary")
+        # Generic hint for non-PDF binaries.
+        self.assertNotIn("pages", atts[0]["hint"])
+
+    def test_docx_routes_to_binary(self) -> None:
+        # docx is just a zip; same extension-based detection should fire.
+        self._write("report.docx", b"PK\x03\x04 docx-ish bytes")
+        _, atts = expand_at_mentions("@report.docx", cwd=self.cwd)
+        self.assertEqual(atts[0]["kind"], "binary")
+
+    def test_text_file_unaffected(self) -> None:
+        self._write("notes.txt", b"hello world\nline two\n")
+        _, atts = expand_at_mentions("@notes.txt", cwd=self.cwd)
+        self.assertEqual(len(atts), 1)
+        self.assertEqual(atts[0]["kind"], "file")
+        self.assertIn("hello world", atts[0]["content"])
+
+    def test_unknown_extension_with_nul_bytes_caught_by_sniffer(self) -> None:
+        """Defense in depth: a misnamed binary (e.g. .dat / .txt) whose
+        extension we haven't enumerated should still be classified as
+        binary based on a NUL byte in the head of the file. Prevents the
+        mojibake regression from sneaking in via any extension we missed."""
+        self._write("blob.dat", b"\x00\x01\x02 actual binary content")
+        _, atts = expand_at_mentions("@blob.dat", cwd=self.cwd)
+        self.assertEqual(len(atts), 1)
+        self.assertEqual(atts[0]["kind"], "binary")
+
+    def test_text_without_nul_bytes_still_text(self) -> None:
+        """Regression: the sniffer must only fire on actual NULs, not on
+        legitimate non-ASCII text. UTF-8 source code with snowmen and
+        accents must still land as ``kind=file``."""
+        self._write("notes.md", "héllo ☃ wörld\n".encode("utf-8"))
+        _, atts = expand_at_mentions("@notes.md", cwd=self.cwd)
+        self.assertEqual(atts[0]["kind"], "file")
+        self.assertIn("héllo", atts[0]["content"])
+
+    def test_utf16_le_bom_decoded_properly_no_mojibake(self) -> None:
+        """Windows-emitted UTF-16-LE files have NUL bytes for every ASCII
+        char and would trip the NUL sniffer. A BOM prefix tells us the
+        file is real text that just needs a different decoder; the text
+        branch picks the codec from the BOM and produces clean text — NOT
+        the mojibake-with-embedded-NULs that an unconditional utf-8 read
+        would produce (that would re-introduce the exact failure mode
+        this whole work was designed to prevent).
+
+        The test asserts BOTH on the attachment content AND on the
+        rendered system-reminder bytes, because the prompt-time failure
+        mode is mojibake-in-system-reminder. Asserting only the routing
+        decision (kind=file) would pin a passing bug.
+        """
+        self._write("win.log", b"\xff\xfe" + "hello world\nsecond line\n".encode("utf-16-le"))
+        _, atts = expand_at_mentions("@win.log", cwd=self.cwd)
+        self.assertEqual(len(atts), 1)
+        self.assertEqual(atts[0]["kind"], "file")
+        self.assertEqual(atts[0]["content"], "hello world\nsecond line\n")
+        # CRITICAL: rendered prompt bytes must NOT contain NULs or utf-8
+        # replacement chars. A regression here is exactly Bug A.
+        rendered = _render(atts)
+        self.assertNotIn(b"\x00", rendered)
+        self.assertNotIn(b"\xef\xbf\xbd", rendered)  # utf-8 of U+FFFD replacement char
+        self.assertIn(b"hello world", rendered)
+
+    def test_utf16_be_bom_decoded_properly(self) -> None:
+        self._write("be.log", b"\xfe\xff" + "hello\n".encode("utf-16-be"))
+        _, atts = expand_at_mentions("@be.log", cwd=self.cwd)
+        self.assertEqual(atts[0]["kind"], "file")
+        self.assertEqual(atts[0]["content"], "hello\n")
+        rendered = _render(atts)
+        self.assertNotIn(b"\x00", rendered)
+        self.assertNotIn(b"\xef\xbf\xbd", rendered)
+        self.assertIn(b"hello", rendered)
+
+    def test_utf8_bom_stripped(self) -> None:
+        """UTF-8 BOM (rare, but Windows-emitted) — must be transparently
+        decoded so the BOM character itself doesn't end up in the prompt
+        content. ``utf-8-sig`` codec handles this."""
+        self._write("bom.md", b"\xef\xbb\xbfhello\n")
+        _, atts = expand_at_mentions("@bom.md", cwd=self.cwd)
+        self.assertEqual(atts[0]["kind"], "file")
+        self.assertEqual(atts[0]["content"], "hello\n")
+        # BOM character (U+FEFF) must NOT appear in either content or render.
+        self.assertNotIn("﻿", atts[0]["content"])
+        rendered = _render(atts)
+        self.assertNotIn(b"\xef\xbb\xbf", rendered)
+
+    def test_utf32_le_bom_decoded(self) -> None:
+        self._write("u32.log", b"\xff\xfe\x00\x00" + "hi\n".encode("utf-32-le"))
+        _, atts = expand_at_mentions("@u32.log", cwd=self.cwd)
+        self.assertEqual(atts[0]["kind"], "file")
+        self.assertEqual(atts[0]["content"], "hi\n")
+        rendered = _render(atts)
+        self.assertNotIn(b"\x00", rendered)
+
+    def test_no_bom_utf16_le_still_classified_binary(self) -> None:
+        """A UTF-16 LE file WITHOUT a BOM still has NULs and we have no
+        signal to tell it from a binary blob. Falls through to the
+        binary branch — strictly safer than shipping NUL-laden mojibake."""
+        self._write("nobom.log", "hello world\n".encode("utf-16-le"))
+        _, atts = expand_at_mentions("@nobom.log", cwd=self.cwd)
+        self.assertEqual(atts[0]["kind"], "binary")
+
+    def test_spoofed_bom_binary_blob_does_not_leak_bug_a_signatures(self) -> None:
+        """Adversarial case: a binary blob whose extension is NOT in
+        ``_AT_MENTION_BINARY_EXTENSIONS`` and whose head bytes start
+        with a valid-looking UTF-16 BOM. UTF-16 decodes every 2-byte
+        pair to *some* Unicode codepoint, so the BOM short-circuit
+        produces a wall of nonsensical CJK/symbol characters rather
+        than NUL- or U+FFFD-laden mojibake.
+
+        The Bug A failure family is specifically about NUL bytes and
+        utf-8 replacement chars being smuggled into a system-reminder
+        and the model latching onto recognisable ASCII fragments. This
+        test pins THAT invariant — the rendered prompt bytes must not
+        contain NUL or U+FFFD — and accepts that an adversarial blob
+        may still get inlined as text. A stronger garble detector that
+        flagged this case would false-positive on legitimate
+        non-Latin-script UTF-16 files (Chinese/Japanese logs, etc.).
+
+        If a future failure mode hinges on recognisable English-ASCII
+        fragments surviving the UTF-16 decode, that's the trigger to
+        revisit — not this case.
+        """
+        # Bytes after the BOM decode to high-plane codepoints (no
+        # English text in the output), so even if the wall-of-glyphs
+        # ships to the model there's no ASCII to latch onto.
+        payload = b"\xff\xfe" + bytes(range(1, 200)) * 4
+        self._write("blob.dat", payload)
+        _, atts = expand_at_mentions("@blob.dat", cwd=self.cwd)
+        self.assertEqual(len(atts), 1)
+        # CRITICAL: Bug A invariants — no NULs, no U+FFFD replacement chars
+        # in the rendered prompt bytes. The walk-of-glyphs is acceptable;
+        # NUL- or U+FFFD-laden mojibake is not.
+        rendered = _render(atts)
+        self.assertNotIn(b"\x00", rendered)
+        self.assertNotIn(b"\xef\xbf\xbd", rendered)
+
+    def test_nul_heavy_decode_falls_back_to_binary(self) -> None:
+        """Adversarial case found mid-review: a binary blob with a
+        spoofed UTF-16 BOM and NUL-heavy body. The UTF-16 decode maps
+        each NUL pair to U+0000 — Python str chars that round-trip
+        BACK to NUL bytes when encoded to utf-8 for the API. Without
+        the NUL-char check in ``_decoded_text_looks_garbled`` the
+        prompt bytes would carry literal NULs — exactly Bug A's
+        failure mode."""
+        payload = b"\xff\xfe" + b"\x00" * 400
+        self._write("nuly.dat", payload)
+        _, atts = expand_at_mentions("@nuly.dat", cwd=self.cwd)
+        self.assertEqual(len(atts), 1)
+        self.assertEqual(atts[0]["kind"], "binary",
+                         "NUL-heavy decode must NOT inline as kind=file")
+        rendered = _render(atts)
+        self.assertNotIn(b"\x00", rendered)
+
+    def test_high_replacement_char_decode_falls_back_to_binary(self) -> None:
+        """A file whose decoded form is *mostly* U+FFFD replacement
+        characters is rejected from the text branch — that pattern is
+        a high-confidence signal that the file isn't the encoding the
+        BOM (or default utf-8) implied. Specifically pins the
+        ``_decoded_text_looks_garbled`` threshold so a regression that
+        opens the floodgates again gets caught.
+
+        Constructed without a BOM so it falls through to utf-8 decode,
+        where deliberately-invalid utf-8 byte sequences produce U+FFFD.
+        """
+        # Mostly 0x80-0xBF bytes (utf-8 continuation bytes with no
+        # leading byte → each one becomes U+FFFD). Mixed with some
+        # printable ASCII to keep len > 0.
+        payload = b"!" + b"\x80\x81\x82\x83" * 200
+        self._write("noisy.bad", payload)
+        _, atts = expand_at_mentions("@noisy.bad", cwd=self.cwd)
+        self.assertEqual(len(atts), 1)
+        self.assertEqual(atts[0]["kind"], "binary",
+                         "decoded text with high U+FFFD fraction must NOT inline as kind=file")
+        rendered = _render(atts)
+        self.assertNotIn(b"\xef\xbf\xbd", rendered)
+
+    def test_image_still_routes_to_image_branch(self) -> None:
+        """Regression: image extensions must keep going through the image
+        pipeline -- binary branch must come AFTER image branch in the
+        dispatch order, so PNG/JPEG never lose their inline behaviour."""
+        # Build a real 1x1 PNG via Pillow so the image pipeline actually
+        # produces an attachment; otherwise the test couldn't tell apart
+        # "image branch dropped silently" from "binary branch fired".
+        try:
+            from PIL import Image
+        except ImportError:  # pragma: no cover
+            self.skipTest("Pillow required")
+        import io
+        buf = io.BytesIO()
+        Image.new("RGB", (1, 1), (255, 0, 0)).save(buf, format="PNG")
+        self._write("pixel.png", buf.getvalue())
+        _, atts = expand_at_mentions("@pixel.png", cwd=self.cwd)
+        self.assertEqual(len(atts), 1)
+        # Image attachment, NOT binary.
+        self.assertEqual(atts[0]["kind"], "image")
+
+
+class TestBinaryAttachmentRendering(unittest.TestCase):
+    def test_rendered_reminder_for_pdf(self) -> None:
+        att = {
+            "kind": "binary",
+            "path": "/x/y/report.pdf",
+            "display_path": "report.pdf",
+            "ext": "pdf",
+            "hint": "PDFs cannot be inlined as @-mention text. Use the Read tool with the ``pages`` parameter.",
+        }
+        rendered = format_at_mention_attachments([att])
+        self.assertIn("<system-reminder>", rendered)
+        self.assertIn("report.pdf", rendered)
+        self.assertIn("binary", rendered.lower())
+        self.assertIn("Read tool", rendered)
+
+    def test_rendered_reminder_for_generic_binary(self) -> None:
+        att = {
+            "kind": "binary",
+            "path": "/x/y/blob.bin",
+            "display_path": "blob.bin",
+            "ext": "bin",
+            "hint": "This file is binary and cannot be inlined as text.",
+        }
+        rendered = format_at_mention_attachments([att])
+        self.assertIn("blob.bin", rendered)
+        self.assertIn("binary", rendered.lower())
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_base_provider_image_validation.py b/tests/test_base_provider_image_validation.py
new file mode 100644
index 0000000..caaa945
--- /dev/null
+++ b/tests/test_base_provider_image_validation.py
@@ -0,0 +1,141 @@
+"""Tests that ``BaseProvider._prepare_messages`` runs the same
+``validate_images_for_api`` guard that the Anthropic-direct ``call_model``
+path runs, so every provider — anthropic, openai-compatible, glm,
+minimax, deepseek, openrouter — rejects oversized base64 images
+client-side instead of letting the API return an opaque error.
+
+Pre-fix gap: the validator only ran on ``services/api/claude.py:call_model``,
+which is the parity-streaming path. Production paths go through
+``provider.chat_stream_response`` and skipped the check entirely.
+"""
+
+from __future__ import annotations
+
+import unittest
+from typing import Any
+
+from src.providers.base import BaseProvider, ChatMessage
+from src.providers.openai_compatible import OpenAICompatibleProvider
+from src.utils.image_processor import API_IMAGE_MAX_BASE64_SIZE
+from src.utils.image_validation import ImageSizeError
+
+
+class _StubProvider(BaseProvider):
+    """Concrete subclass that exposes ``_prepare_messages`` for direct test
+    invocation without standing up a real chat client."""
+
+    def chat(self, messages, tools=None, **kwargs):  # pragma: no cover - unused
+        raise NotImplementedError
+
+    def chat_stream(self, messages, tools=None, **kwargs):  # pragma: no cover - unused
+        raise NotImplementedError
+
+    def get_available_models(self):  # pragma: no cover - unused
+        return []
+
+
+class _StubOpenAICompat(OpenAICompatibleProvider):
+    """Concrete OpenAI-compatible subclass for exercising the override path."""
+
+    def _create_client(self):  # pragma: no cover - unused
+        raise NotImplementedError
+
+    def get_available_models(self):  # pragma: no cover - unused
+        return []
+
+
+def _img_block(b64_len: int) -> dict[str, Any]:
+    return {
+        "type": "image",
+        "source": {"type": "base64", "media_type": "image/png", "data": "A" * b64_len},
+    }
+
+
+def _msg(content) -> dict[str, Any]:
+    return {"role": "user", "content": content}
+
+
+class TestBaseProviderImageValidation(unittest.TestCase):
+    def test_oversize_image_raises_imagesizeerror(self) -> None:
+        provider = _StubProvider(api_key="test")
+        with self.assertRaises(ImageSizeError):
+            provider._prepare_messages([_msg([_img_block(API_IMAGE_MAX_BASE64_SIZE + 1)])])
+
+    def test_image_at_limit_passes(self) -> None:
+        provider = _StubProvider(api_key="test")
+        out = provider._prepare_messages([_msg([_img_block(API_IMAGE_MAX_BASE64_SIZE)])])
+        self.assertEqual(len(out), 1)
+        self.assertEqual(out[0]["role"], "user")
+
+    def test_no_images_passes(self) -> None:
+        provider = _StubProvider(api_key="test")
+        out = provider._prepare_messages([_msg([{"type": "text", "text": "hi"}])])
+        self.assertEqual(out, [_msg([{"type": "text", "text": "hi"}])])
+
+    def test_text_only_string_content_passes(self) -> None:
+        """Regression: providers without image support should be unaffected."""
+        provider = _StubProvider(api_key="test")
+        out = provider._prepare_messages([_msg("just a string")])
+        self.assertEqual(out, [_msg("just a string")])
+
+    def test_chat_message_input_converted_and_validated(self) -> None:
+        """ChatMessage dataclass input still flows through to_dict() and is
+        validated. Defensive — most callers already pass dicts."""
+        provider = _StubProvider(api_key="test")
+        out = provider._prepare_messages([ChatMessage(role="user", content="hi")])
+        self.assertEqual(out, [{"role": "user", "content": "hi"}])
+
+    def test_oversize_aggregated_with_other_messages(self) -> None:
+        provider = _StubProvider(api_key="test")
+        with self.assertRaises(ImageSizeError) as cm:
+            provider._prepare_messages([
+                _msg([{"type": "text", "text": "intro"}]),
+                _msg([_img_block(API_IMAGE_MAX_BASE64_SIZE + 100)]),
+                _msg([_img_block(API_IMAGE_MAX_BASE64_SIZE + 200)]),
+            ])
+        self.assertEqual(len(cm.exception.oversized), 2)
+
+
+class TestOpenAICompatPrepareValidatesBeforeTranslation(unittest.TestCase):
+    """The OpenAI-compatible subclass MUST validate the Anthropic-shape
+    payload (via ``super()._prepare_messages``) BEFORE translation. After
+    translation the block carries a ``data:image/png;base64,...`` URL and
+    the base64 walker can no longer find it."""
+
+    def test_oversize_image_raises_before_translation(self) -> None:
+        provider = _StubOpenAICompat(api_key="test")
+        with self.assertRaises(ImageSizeError):
+            provider._prepare_messages([_msg([_img_block(API_IMAGE_MAX_BASE64_SIZE + 1)])])
+
+    def test_in_limit_image_translates_to_image_url(self) -> None:
+        provider = _StubOpenAICompat(api_key="test")
+        out = provider._prepare_messages([_msg([_img_block(1000)])])
+        # Single user message; image block translated to OpenAI ``image_url`` shape.
+        self.assertEqual(len(out), 1)
+        blocks = out[0]["content"]
+        self.assertEqual(blocks[0]["type"], "image_url")
+        self.assertTrue(blocks[0]["image_url"]["url"].startswith("data:image/png;base64,"))
+
+    def test_tool_result_with_oversize_image_raises(self) -> None:
+        """Images embedded inside tool_result content (Read tool image path)
+        must also be validated before the Anthropic→OpenAI split."""
+        provider = _StubOpenAICompat(api_key="test")
+        tool_result_with_image = {
+            "role": "user",
+            "content": [
+                {
+                    "type": "tool_result",
+                    "tool_use_id": "tu_1",
+                    "content": [
+                        {"type": "text", "text": "image content:"},
+                        _img_block(API_IMAGE_MAX_BASE64_SIZE + 1),
+                    ],
+                }
+            ],
+        }
+        with self.assertRaises(ImageSizeError):
+            provider._prepare_messages([tool_result_with_image])
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_image_validation.py b/tests/test_image_validation.py
index 9d0bc6c..f01ecf8 100644
--- a/tests/test_image_validation.py
+++ b/tests/test_image_validation.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 import unittest
+from typing import Any
 
 from src.utils.image_processor import API_IMAGE_MAX_BASE64_SIZE
 from src.utils.image_validation import ImageSizeError, validate_images_for_api
@@ -62,6 +63,86 @@ def test_handles_nested_tool_result_with_image(self) -> None:
         with self.assertRaises(ImageSizeError):
             validate_images_for_api(msgs)
 
+    def test_oversize_image_inside_tool_result_content_raises(self) -> None:
+        """Read tool returns image blocks inside ``tool_result.content``
+        (post-#154/#155). Validator must recurse so it sees them, otherwise
+        an oversized base64 reaches the API and surfaces as an opaque error.
+        """
+        nested = {
+            "role": "user",
+            "content": [
+                {
+                    "type": "tool_result",
+                    "tool_use_id": "tu_1",
+                    "content": [
+                        {"type": "text", "text": "image content:"},
+                        _img_block(API_IMAGE_MAX_BASE64_SIZE + 1),
+                    ],
+                }
+            ],
+        }
+        with self.assertRaises(ImageSizeError) as cm:
+            validate_images_for_api([nested])
+        self.assertEqual(len(cm.exception.oversized), 1)
+
+    def test_in_limit_image_inside_tool_result_passes(self) -> None:
+        nested = {
+            "role": "user",
+            "content": [
+                {
+                    "type": "tool_result",
+                    "tool_use_id": "tu_1",
+                    "content": [_img_block(1024)],
+                }
+            ],
+        }
+        validate_images_for_api([nested])  # should not raise
+
+    def test_tool_result_with_string_content_does_not_crash(self) -> None:
+        """Defensive: a tool_result whose ``content`` is a plain string
+        (no images) must not trip the new recursion."""
+        msgs = [{
+            "role": "user",
+            "content": [{"type": "tool_result", "tool_use_id": "x", "content": "ok"}],
+        }]
+        validate_images_for_api(msgs)  # should not raise
+
+    def test_deeply_nested_tool_results_hit_depth_limit_not_recursion_error(self) -> None:
+        """Pathological nesting must NOT raise ``RecursionError``. The
+        walker has a soft depth cap (``_MAX_TOOL_RESULT_DEPTH``) and
+        stops descending past it. Beyond-the-cap images are skipped
+        silently (a wider validator could log a warning, but raising
+        would convert a defensive guard into a crash for any
+        adversarial input).
+        """
+        # Build a chain 5000-deep — well above Python's default
+        # recursion limit (~1000), guaranteeing the unbounded form
+        # would crash.
+        innermost = _img_block(API_IMAGE_MAX_BASE64_SIZE + 1)
+        node: Any = [innermost]
+        for _ in range(5000):
+            node = [{"type": "tool_result", "tool_use_id": "x", "content": node}]
+        msgs = [{"role": "user", "content": node}]
+        # Should NOT raise RecursionError. Deep image is past the
+        # depth cap so it's silently skipped. Important: this means
+        # validate_images_for_api returns without finding an oversize
+        # image — the depth cap is a safety net, not a strict
+        # invariant. Real-world tool_result nesting is single-digit.
+        validate_images_for_api(msgs)
+
+    def test_within_depth_limit_image_still_caught(self) -> None:
+        """Sanity check the depth cap doesn't reject normal-depth tool_results."""
+        # Realistic shape: single tool_result containing an image.
+        msgs = [{
+            "role": "user",
+            "content": [{
+                "type": "tool_result", "tool_use_id": "x",
+                "content": [_img_block(API_IMAGE_MAX_BASE64_SIZE + 1)],
+            }],
+        }]
+        with self.assertRaises(ImageSizeError):
+            validate_images_for_api(msgs)
+
     def test_skips_image_block_with_no_data(self) -> None:
         """Defensive: malformed blocks without source.data don't crash."""
         bad = {"type": "image", "source": {"type": "url", "url": "https://x"}}
diff --git a/tests/test_openai_compat_document_translation.py b/tests/test_openai_compat_document_translation.py
new file mode 100644
index 0000000..7cafc22
--- /dev/null
+++ b/tests/test_openai_compat_document_translation.py
@@ -0,0 +1,211 @@
+"""Tests for Anthropic→OpenAI document-block translation in the
+OpenAI-compatible provider converter.
+
+Anthropic shape: ``{"type": "document", "source": {"type": "base64",
+"media_type": "application/pdf", "data": "..."}}``.
+OpenAI shape:    ``{"type": "file", "file": {"filename": "document.pdf",
+"file_data": "data:application/pdf;base64,..."}}``.
+
+No production path currently produces ``DocumentBlock`` for an
+OpenAI-compatible provider — PDFs flow through Read tool's
+``_read_map_result_to_api`` as text. This translator is a defensive
+addition so that if a future @-mention path or third-party tool
+returns a PDF block, it lands in the OpenAI shape instead of silently
+passing through as an Anthropic-shape block the provider can't parse.
+"""
+
+from __future__ import annotations
+
+import unittest
+
+from src.providers.openai_compatible import (
+    _anthropic_document_block_to_openai,
+    _convert_anthropic_messages_to_openai,
+)
+
+
+class TestAnthropicDocumentBlockToOpenAI(unittest.TestCase):
+    def test_valid_block_translates_to_file(self) -> None:
+        out = _anthropic_document_block_to_openai({
+            "type": "document",
+            "source": {"type": "base64", "media_type": "application/pdf", "data": "ABCD"},
+        })
+        self.assertIsNotNone(out)
+        self.assertEqual(out["type"], "file")
+        self.assertEqual(out["file"]["filename"], "document.pdf")
+        self.assertEqual(out["file"]["file_data"], "data:application/pdf;base64,ABCD")
+
+    def test_non_document_block_returns_none(self) -> None:
+        # An image block must NOT be claimed by the document translator,
+        # otherwise the multimodal dispatcher would emit the wrong shape.
+        self.assertIsNone(_anthropic_document_block_to_openai({
+            "type": "image",
+            "source": {"type": "base64", "media_type": "image/png", "data": "XYZ"},
+        }))
+        self.assertIsNone(_anthropic_document_block_to_openai({
+            "type": "text", "text": "hi"}))
+
+    def test_missing_source_returns_none(self) -> None:
+        self.assertIsNone(_anthropic_document_block_to_openai({"type": "document"}))
+
+    def test_url_source_returns_none(self) -> None:
+        """Anthropic supports url-based document blocks too; we only
+        translate base64 form here (data URI requires raw bytes)."""
+        self.assertIsNone(_anthropic_document_block_to_openai({
+            "type": "document",
+            "source": {"type": "url", "url": "https://example.com/doc.pdf"},
+        }))
+
+    def test_missing_media_type_defaults_to_pdf(self) -> None:
+        out = _anthropic_document_block_to_openai({
+            "type": "document",
+            "source": {"type": "base64", "data": "ABCD"},
+        })
+        self.assertIsNotNone(out)
+        self.assertEqual(out["file"]["file_data"], "data:application/pdf;base64,ABCD")
+
+    def test_empty_data_returns_none(self) -> None:
+        """Producer-bug guard mirrors the image translator: empty
+        ``data`` would produce ``data:application/pdf;base64,`` which
+        the server rejects with a confusing error. Returning None lets
+        the upstream serializer surface the malformed shape instead."""
+        self.assertIsNone(_anthropic_document_block_to_openai({
+            "type": "document",
+            "source": {"type": "base64", "media_type": "application/pdf", "data": ""},
+        }))
+        self.assertIsNone(_anthropic_document_block_to_openai({
+            "type": "document",
+            "source": {"type": "base64", "media_type": "application/pdf"},
+        }))
+
+    def test_non_dict_source_returns_none(self) -> None:
+        self.assertIsNone(_anthropic_document_block_to_openai({
+            "type": "document",
+            "source": "not-a-dict",
+        }))
+
+
+class TestConverterDocumentIntegration(unittest.TestCase):
+    """End-to-end coverage through ``_convert_anthropic_messages_to_openai``."""
+
+    def test_user_message_with_document_translates(self) -> None:
+        messages = [
+            {"role": "user", "content": [
+                {"type": "text", "text": "Summarise this PDF"},
+                {"type": "document",
+                 "source": {"type": "base64", "media_type": "application/pdf", "data": "PDFB64"}},
+            ]},
+        ]
+        out = _convert_anthropic_messages_to_openai(messages)
+        self.assertEqual(len(out), 1)
+        blocks = out[0]["content"]
+        # Text passes through; document becomes a "file" block.
+        self.assertEqual(blocks[0]["type"], "text")
+        self.assertEqual(blocks[1]["type"], "file")
+        self.assertIn("data:application/pdf;base64,PDFB64", blocks[1]["file"]["file_data"])
+
+    def test_mixed_image_and_document_translate_to_distinct_shapes(self) -> None:
+        """Image must become ``image_url``; document must become ``file``.
+        Pins that the multimodal dispatcher doesn't accidentally route
+        both through the same translator."""
+        messages = [
+            {"role": "user", "content": [
+                {"type": "image",
+                 "source": {"type": "base64", "media_type": "image/png", "data": "IMG"}},
+                {"type": "document",
+                 "source": {"type": "base64", "media_type": "application/pdf", "data": "DOC"}},
+            ]},
+        ]
+        out = _convert_anthropic_messages_to_openai(messages)
+        blocks = out[0]["content"]
+        self.assertEqual(blocks[0]["type"], "image_url")
+        self.assertEqual(blocks[1]["type"], "file")
+
+    def test_tool_result_with_document_splits_into_tool_then_user_file(self) -> None:
+        """The image-tool-result split path must extend to documents:
+        ``role=tool`` (text body / placeholder) + synthetic ``role=user``
+        carrying the ``file`` content block."""
+        messages = [
+            {"role": "assistant", "content": [
+                {"type": "tool_use", "id": "tu_doc", "name": "Read", "input": {}},
+            ]},
+            {"role": "user", "content": [
+                {"type": "tool_result", "tool_use_id": "tu_doc", "content": [
+                    {"type": "document",
+                     "source": {"type": "base64", "media_type": "application/pdf", "data": "PDF"}},
+                ]},
+            ]},
+        ]
+        out = _convert_anthropic_messages_to_openai(messages)
+        self.assertEqual(out[0]["role"], "assistant")
+        self.assertEqual(out[1]["role"], "tool")
+        self.assertEqual(out[1]["tool_call_id"], "tu_doc")
+        # tool message must have non-empty content (OpenAI requires it)
+        # AND must carry the tool_use_id correlation marker.
+        self.assertTrue(out[1]["content"])
+        self.assertIn("tu_doc", out[1]["content"])
+        # Synthetic user message follows: correlation text block first,
+        # then the translated file block.
+        self.assertEqual(out[2]["role"], "user")
+        self.assertEqual(out[2]["content"][0]["type"], "text")
+        self.assertIn("tu_doc", out[2]["content"][0]["text"])
+        self.assertEqual(out[2]["content"][1]["type"], "file")
+
+    def test_tool_result_with_text_and_document_splits(self) -> None:
+        messages = [
+            {"role": "assistant", "content": [
+                {"type": "tool_use", "id": "tu_2", "name": "Read", "input": {}},
+            ]},
+            {"role": "user", "content": [
+                {"type": "tool_result", "tool_use_id": "tu_2", "content": [
+                    {"type": "text", "text": "Here is the PDF"},
+                    {"type": "document",
+                     "source": {"type": "base64", "media_type": "application/pdf", "data": "PDF2"}},
+                ]},
+            ]},
+        ]
+        out = _convert_anthropic_messages_to_openai(messages)
+        tool_msgs = [m for m in out if m.get("role") == "tool"]
+        self.assertEqual(len(tool_msgs), 1)
+        self.assertIn("Here is the PDF", tool_msgs[0]["content"])
+        # Tool message must ALSO carry the tool_use_id correlation marker
+        # in the text+document case (symmetry with text+image).
+        self.assertIn("tu_2", tool_msgs[0]["content"])
+        idx = out.index(tool_msgs[0])
+        self.assertEqual(out[idx + 1]["role"], "user")
+        # Correlation text block first, then translated file block.
+        self.assertEqual(out[idx + 1]["content"][0]["type"], "text")
+        self.assertIn("tu_2", out[idx + 1]["content"][0]["text"])
+        self.assertEqual(out[idx + 1]["content"][1]["type"], "file")
+
+    def test_tool_result_image_plus_document_both_carried_in_user_message(self) -> None:
+        """A tool_result with BOTH an image and a document should produce
+        a single synthetic user message containing BOTH translated
+        blocks, in order."""
+        messages = [
+            {"role": "assistant", "content": [
+                {"type": "tool_use", "id": "tu_mix", "name": "Read", "input": {}},
+            ]},
+            {"role": "user", "content": [
+                {"type": "tool_result", "tool_use_id": "tu_mix", "content": [
+                    {"type": "image",
+                     "source": {"type": "base64", "media_type": "image/png", "data": "I"}},
+                    {"type": "document",
+                     "source": {"type": "base64", "media_type": "application/pdf", "data": "D"}},
+                ]},
+            ]},
+        ]
+        out = _convert_anthropic_messages_to_openai(messages)
+        user_msgs = [m for m in out if m.get("role") == "user"]
+        # The original user-with-tool_result is the input; only the
+        # synthetic user message survives in the output (the original
+        # was consumed by the tool_result branch).
+        self.assertEqual(len(user_msgs), 1)
+        blocks = user_msgs[0]["content"]
+        types = [b.get("type") for b in blocks]
+        self.assertIn("image_url", types)
+        self.assertIn("file", types)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_openai_compat_image_translation.py b/tests/test_openai_compat_image_translation.py
index 56b5a50..370e2ac 100644
--- a/tests/test_openai_compat_image_translation.py
+++ b/tests/test_openai_compat_image_translation.py
@@ -155,10 +155,19 @@ def test_tool_result_with_image_splits_into_tool_then_user_image(self) -> None:
         self.assertEqual(out[1]["role"], "tool")
         self.assertEqual(out[1]["tool_call_id"], "tu_1")
         self.assertTrue(out[1]["content"], "tool message content must be non-empty")
-        # 3) synthetic user message carrying the image_url
+        # Placeholder must reference the tool_use_id so the model can
+        # mentally correlate the synthetic ``role=user`` follow-up message
+        # back to *this* tool_call (known OpenAI-API limitation: there's
+        # no wire-level link between tool_call_id and a user message).
+        self.assertIn("tu_1", out[1]["content"])
+        # 3) synthetic user message: leads with a correlation text block
+        # naming the same tool_use_id (so the link is visible from both
+        # directions), followed by the image_url block.
         self.assertEqual(out[2]["role"], "user")
-        self.assertEqual(out[2]["content"][0]["type"], "image_url")
-        self.assertIn("png;base64,XYZ", out[2]["content"][0]["image_url"]["url"])
+        self.assertEqual(out[2]["content"][0]["type"], "text")
+        self.assertIn("tu_1", out[2]["content"][0]["text"])
+        self.assertEqual(out[2]["content"][1]["type"], "image_url")
+        self.assertIn("png;base64,XYZ", out[2]["content"][1]["image_url"]["url"])
 
     def test_tool_result_with_text_and_image_splits_text_to_tool_image_to_user(self) -> None:
         messages = [
@@ -177,10 +186,42 @@ def test_tool_result_with_text_and_image_splits_text_to_tool_image_to_user(self)
         tool_msgs = [m for m in out if m.get("role") == "tool"]
         self.assertEqual(len(tool_msgs), 1)
         self.assertIn("Here is the image", tool_msgs[0]["content"])
-        # The image becomes its own following user message.
+        # Tool message must ALSO carry the tool_use_id correlation marker,
+        # symmetric with the image-only branch. Without it the text+image
+        # case would have no correlation hint and the synthetic user
+        # message could be misread as a new prompt.
+        self.assertIn("tu_2", tool_msgs[0]["content"])
+        # The image becomes its own following user message. Leads with a
+        # text correlation marker, then the image_url block.
         idx = out.index(tool_msgs[0])
         self.assertEqual(out[idx + 1]["role"], "user")
-        self.assertEqual(out[idx + 1]["content"][0]["type"], "image_url")
+        self.assertEqual(out[idx + 1]["content"][0]["type"], "text")
+        self.assertIn("tu_2", out[idx + 1]["content"][0]["text"])
+        self.assertEqual(out[idx + 1]["content"][1]["type"], "image_url")
+
+    def test_tool_result_with_empty_list_content_emits_non_empty_tool_message(self) -> None:
+        """Defensive sentinel: an empty tool_result content list (or a list
+        of block types the converter doesn't recognise) must NOT emit a
+        ``role=tool`` with empty content — OpenAI rejects that. The
+        converter falls back to a literal ``[empty tool result]`` so the
+        invariant documented in the split comment is enforced by code,
+        not just by docstring."""
+        messages = [
+            {"role": "assistant", "content": [
+                {"type": "tool_use", "id": "tu_e", "name": "Bash", "input": {}},
+            ]},
+            {"role": "user", "content": [
+                {"type": "tool_result", "tool_use_id": "tu_e", "content": []},
+            ]},
+        ]
+        out = _convert_anthropic_messages_to_openai(messages)
+        tool_msgs = [m for m in out if m.get("role") == "tool"]
+        self.assertEqual(len(tool_msgs), 1)
+        self.assertTrue(tool_msgs[0]["content"])
+        # No synthetic user message should follow when there were no
+        # multimodal blocks (only the empty-content sentinel applies).
+        idx = out.index(tool_msgs[0])
+        self.assertEqual(idx + 1, len(out), "no synthetic user message expected")
 
     def test_tool_result_text_only_unchanged_no_extra_user_message(self) -> None:
         """A text-only tool_result must NOT spawn an extra synthetic user
diff --git a/tests/test_query_loop.py b/tests/test_query_loop.py
index 3dad14f..2baadeb 100644
--- a/tests/test_query_loop.py
+++ b/tests/test_query_loop.py
@@ -284,5 +284,66 @@ async def run():
         self.assertGreaterEqual(len(interruptions), 1)
 
 
+class TestQueryLoopImageSizeError(unittest.TestCase):
+    """ImageSizeError raised by ``BaseProvider._prepare_messages`` must be
+    translated by ``_call_model_sync`` into a media_size assistant error
+    rather than crashing the query loop. Pins the contract between the
+    provider-layer pre-API guard and the higher-level recovery path."""
+
+    def setUp(self):
+        self.temp_dir = tempfile.TemporaryDirectory()
+        self.workspace = Path(self.temp_dir.name)
+        self.registry = build_default_registry()
+        self.context = ToolContext(workspace_root=self.workspace)
+        self.abort = AbortController()
+
+    def tearDown(self):
+        self.temp_dir.cleanup()
+
+    def test_image_size_error_from_provider_yields_media_size_assistant_message(self):
+        from src.utils.image_validation import ImageSizeError
+
+        provider = MagicMock()
+        # Both streaming and non-streaming code paths share _prepare_messages,
+        # so both surface ImageSizeError. Simulate that here.
+        oversize_exc = ImageSizeError([(6 * 1024 * 1024, 5 * 1024 * 1024)])
+        provider.chat_stream_response.side_effect = oversize_exc
+        provider.chat.side_effect = oversize_exc
+
+        messages = [UserMessage(content="Describe this oversized image")]
+        params = QueryParams(
+            messages=messages,
+            system_prompt="You are helpful.",
+            tools=self.registry.list_tools(),
+            tool_registry=self.registry,
+            tool_use_context=self.context,
+            provider=provider,
+            abort_controller=self.abort,
+            max_turns=10,
+        )
+
+        collected = []
+
+        async def run():
+            async for msg in query(params):
+                collected.append(msg)
+
+        _run(run())
+
+        assistants = [m for m in collected if isinstance(m, AssistantMessage)]
+        self.assertEqual(len(assistants), 1)
+        err = assistants[0]
+        # The assistant message must be flagged as an API-error and carry
+        # the ``media_size`` classification so the reactive-compact recovery
+        # path can match on it.
+        self.assertTrue(getattr(err, "isApiErrorMessage", False))
+        self.assertEqual(getattr(err, "_api_error", None), "media_size")
+        content = err.content
+        text = content if isinstance(content, str) else "".join(
+            b.text for b in content if isinstance(b, TextBlock)
+        )
+        self.assertIn("Media too large", text)
+
+
 if __name__ == "__main__":
     unittest.main()