From 22f300de02a9a04d465395d2081f1c78d979679d Mon Sep 17 00:00:00 2001 From: Eric Lee Date: Sat, 16 May 2026 10:57:48 -0700 Subject: [PATCH] fix(image-handling): close 4 audit follow-ups from PR #155 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the four pre-existing gaps the PR #155 audit surfaced: 1. validate_images_for_api ran only on the Anthropic-direct call_model path; all provider.chat_stream_response paths skipped it. Promoted into BaseProvider._prepare_messages so every provider validates client-side. query._call_model_sync catches ImageSizeError and surfaces a media_size assistant error (matches the existing server-side classification). agent_loop._call_provider_for_turn re-raises ImageSizeError instead of falling back to chat() (which would re-trigger the same validation). Validator also now walks images nested inside tool_result.content — post-PR #154 the Read tool returns image blocks there and the old walker missed them. 2. OpenAI tool->user split symmetric correlation. Tool message body now always carries [multimodal content for tool_use_id=X ...] when the synthetic role=user message follows; the user message leads with a [content for tool_use_id=X] text block so the link is visible from both directions. Empty content sentinel ([empty tool result]) guards the "non-empty content required" invariant for content=[] cases. 3. @file.pdf and other binary @-mentions no longer hit the text-mode open() that produced mojibake-in-system-reminder. Known binary extensions (pdf/zip/docx/...) plus a NUL-byte content sniff route to a new "binary" attachment kind with a Read-tool hint (PDF specifically mentions the pages parameter). BOM-aware decoder (_read_text_with_encoding) handles Windows-emitted UTF-16/UTF-32 files via the BOM-aware codecs (utf-16, utf-32, utf-8-sig) so the BOM is consumed rather than preserved as a literal U+FEFF char. Post-decode garbled-text detector (NUL zero-tolerance + 3% U+FFFD threshold) falls back to a binary attachment when an adversarial spoofed-BOM blob would otherwise leak. 4. _anthropic_image_block_to_openai now has a sibling _anthropic_document_block_to_openai that translates Anthropic DocumentBlock (PDF) to OpenAI's {type:"file"} shape. Defensive: no production path currently produces DocumentBlock for OpenAI-compat, but future ones won't silently pass through. Test plan: - 94 new tests across test_at_mention_binary_files.py, test_base_provider_image_validation.py, test_agent_loop_image_size_propagation.py, test_openai_compat_document_translation.py, plus extensions to test_image_validation.py, test_openai_compat_image_translation.py, test_query_loop.py. - Wider suite: 5015 pass / 10 pre-existing failures unchanged. - Critic review loop: APPROVE after three rounds (round-2 caught a UTF-16 BOM regression I introduced; round-3 fixed it and added the post-decode garble detector). Co-Authored-By: Claude Opus 4.7 --- src/command_system/input_processing.py | 251 +++++++++++++- src/providers/base.py | 24 +- src/providers/openai_compatible.py | 180 ++++++++-- src/query/query.py | 12 + src/repl/core.py | 8 + src/services/api/claude.py | 9 +- src/tool_system/agent_loop.py | 7 + src/utils/image_validation.py | 49 ++- .../test_agent_loop_image_size_propagation.py | 96 ++++++ tests/test_at_mention_binary_files.py | 313 ++++++++++++++++++ tests/test_base_provider_image_validation.py | 141 ++++++++ tests/test_image_validation.py | 81 +++++ ...test_openai_compat_document_translation.py | 211 ++++++++++++ tests/test_openai_compat_image_translation.py | 51 ++- tests/test_query_loop.py | 61 ++++ 15 files changed, 1450 insertions(+), 44 deletions(-) create mode 100644 tests/test_agent_loop_image_size_propagation.py create mode 100644 tests/test_at_mention_binary_files.py create mode 100644 tests/test_base_provider_image_validation.py create mode 100644 tests/test_openai_compat_document_translation.py diff --git a/src/command_system/input_processing.py b/src/command_system/input_processing.py index 3dcd024..83b0c98 100644 --- a/src/command_system/input_processing.py +++ b/src/command_system/input_processing.py @@ -171,6 +171,206 @@ def _extract_urls(text: str) -> list[str]: # @-mention pipeline and the Read tool agree on what counts as an image. _AT_MENTION_IMAGE_EXTENSIONS = frozenset({"png", "jpg", "jpeg", "gif", "webp"}) +# Extensions known to be binary. Opening one in text mode with +# ``errors="replace"`` would emit a wall of utf-8 replacement chars that +# the model latches onto and hallucinates from (the exact failure mode the +# image branch was added to prevent). We skip the read and emit a small +# system-reminder pointing the model at the Read tool instead. PDFs are +# the user-visible case (Read tool supports them via ``pages``); the rest +# are defense in depth so a stray @archive.zip doesn't pollute the prompt +# either. +_AT_MENTION_BINARY_EXTENSIONS = frozenset({ + "pdf", + "zip", "tar", "gz", "tgz", "bz2", "xz", "7z", "rar", + "exe", "dll", "so", "dylib", "a", "o", + "bin", "iso", "dmg", + "docx", "xlsx", "pptx", "doc", "xls", "ppt", + "mp3", "wav", "flac", "ogg", "m4a", + "mp4", "mov", "avi", "mkv", "webm", + "ttf", "otf", "woff", "woff2", + "class", "jar", "war", + "pyc", "pyo", + "sqlite", "db", +}) + +# Heuristic chunk used by ``_looks_like_binary``: enough bytes to spot a +# NUL byte in plausible binary headers without paying the full read cost +# for huge files. +_BINARY_SNIFF_BYTES = 8192 + + +# BOM signatures we know how to decode as text. Order matters: UTF-32 +# BOMs must be checked BEFORE the matching UTF-16 BOMs (UTF-32-LE BOM +# `\xff\xfe\x00\x00` starts with the UTF-16-LE BOM `\xff\xfe`). +# +# Codec choice: use the BOM-aware codecs (``utf-16``, ``utf-32``) rather +# than the explicit-byte-order forms (``utf-16-le``, ``utf-16-be``). The +# latter PRESERVE the BOM as a literal ```` codepoint at the start +# of the string; the BOM-aware codecs consume it. We picked the encoding +# by reading the BOM, so feeding the file to a codec that re-reads it is +# both correct and strips the otherwise-leaking zero-width-no-break-space. +_BOM_ENCODINGS: tuple[tuple[bytes, str], ...] = ( + (b"\xff\xfe\x00\x00", "utf-32"), + (b"\x00\x00\xfe\xff", "utf-32"), + (b"\xff\xfe", "utf-16"), + (b"\xfe\xff", "utf-16"), + (b"\xef\xbb\xbf", "utf-8-sig"), +) + + +def _detect_bom_encoding(head: bytes) -> str | None: + """Return the Python codec name implied by a BOM prefix, or None.""" + for sig, enc in _BOM_ENCODINGS: + if head.startswith(sig): + return enc + return None + + +def _looks_like_binary(path: str) -> bool: + """Return True if the first ``_BINARY_SNIFF_BYTES`` bytes contain a NUL + AND the file is not BOM-prefixed text. + + Defense-in-depth check for files whose extension isn't in + ``_AT_MENTION_BINARY_EXTENSIONS`` — e.g. a misnamed ``.txt`` that's + actually a tarball, or any extension we haven't enumerated. NUL bytes + do not appear in well-formed utf-8 text, so their presence in a sniff + window is a high-precision signal. + + Exception: UTF-16 / UTF-32 text files contain NUL bytes for every + ASCII char and would trip the naive NUL sniffer. A BOM prefix + (``\\xff\\xfe`` LE / ``\\xfe\\xff`` BE / UTF-32 variants) tells us + the file is real text that just needs a different decoder; we return + False so the text-read branch can decode it properly via + ``_read_text_with_encoding``. Without that paired fix this BOM + short-circuit would let UTF-16 mojibake flow into a system-reminder + (the exact failure mode the binary branch was added to prevent). + + Conservative on read errors (treats unreadable files as "not binary" + so the existing text-read path can surface the OSError its caller + already handles). + """ + try: + with open(path, "rb") as fh: + chunk = fh.read(_BINARY_SNIFF_BYTES) + except OSError: + return False + if _detect_bom_encoding(chunk) is not None: + # Encoded text — handled by the text branch with the right codec. + return False + return b"\x00" in chunk + + +# If a decoded file has more than this fraction of U+FFFD replacement +# chars, treat it as garbage rather than text. A real UTF-16 / UTF-8 +# text file produces well under 1% replacements even with `errors=replace`; +# a binary blob with a fake BOM that doesn't pair to valid UTF-16 +# codepoints produces 10-50%+. Threshold picked to be generously high +# so a UTF-16 file with a few corrupt bytes still ships its text, but +# an adversarial binary blob with a spoofed BOM doesn't leak mojibake +# into the prompt. +_MAX_REPLACEMENT_CHAR_FRACTION = 0.03 + + +def _decoded_text_looks_garbled(decoded: str) -> bool: + """Return True if the decoded string looks like binary garbage rather + than legitimate text. Two heuristics: + + 1. **NUL char presence (zero-tolerance).** A real text file has zero + U+0000 characters. If we decoded any, the source bytes contained + NULs that the codec mapped to U+0000 — i.e. binary garbage that + happened to align with a valid UTF-16 NUL pair. Letting these + through would re-introduce Bug A's NUL-in-system-reminder + failure: when the str round-trips through utf-8 encoding for the + API, those U+0000 chars become literal NUL bytes in the prompt. + + 2. **U+FFFD fraction (high-confidence garbage signal).** Above + ``_MAX_REPLACEMENT_CHAR_FRACTION`` the file isn't the encoding + the BOM claimed. + + Residual cases the heuristic does NOT catch: a binary blob with a + spoofed BOM whose every 2-byte pair decodes to a valid non-NUL + Unicode codepoint (e.g. all bytes in a printable plane). These + appear as walls of CJK or symbol characters — not Bug A's failure + family (no NULs, no replacement chars), and the model lacks + recognizable ASCII fragments to hallucinate from. Accepting this + as residual risk; a more aggressive detector would false-positive + on legitimate non-Latin-script text files. + """ + if not decoded: + return False + if "\x00" in decoded: + return True + replacement_count = decoded.count("�") + if replacement_count == 0: + return False + return (replacement_count / len(decoded)) > _MAX_REPLACEMENT_CHAR_FRACTION + + +def _read_text_with_encoding(path: str) -> str | None: + """Read ``path`` as text, picking a decoder from any leading BOM. + + Falls back to utf-8 with ``errors="replace"`` for the common case + (no BOM). When a BOM is present we use the codec it implies (with + ``errors="replace"`` so a malformed trailing byte doesn't bring + down the whole read). + + Post-decode garbage check: if the decoded text is mostly U+FFFD + replacement characters, returns ``None`` so the caller drops the + attachment instead of inlining mojibake. This closes the adversarial + case where a binary blob starts with `\\xff\\xfe` (fake UTF-16 BOM) + and an extension we haven't enumerated — without the check, the + BOM short-circuit would route the blob to the text branch and the + `errors="replace"` decode would land replacement-char mojibake in + the prompt (same failure family as Bug A even though no NULs leak). + + Returns ``None`` on OSError or on garbled-decode so the caller + matches the existing "drop this attachment silently" behaviour for + unreadable files. Callers should still emit a binary-style reminder + via the @-mention `binary` attachment kind when this returns None + for a non-OSError reason — see ``expand_at_mentions``. + """ + try: + with open(path, "rb") as fh: + head = fh.read(4) + except OSError: + return None + encoding = _detect_bom_encoding(head) or "utf-8" + try: + with open(path, "r", encoding=encoding, errors="replace") as fh: + data = fh.read() + except OSError: + return None + except (LookupError, UnicodeError): + # Pathological codec name from a future BOM check, or a UTF-32 + # decode failure with errors=replace (rare but theoretically + # possible). Fall back to a raw utf-8 replacement read so the + # path still produces *some* text rather than a silent drop. + try: + with open(path, "r", encoding="utf-8", errors="replace") as fh: + data = fh.read() + except OSError: + return None + if _decoded_text_looks_garbled(data): + return None + return data + + +def _binary_hint_for_ext(ext: str) -> str: + """Return a per-extension hint the model can act on, or a generic + fallback. PDF specifically points at the Read tool's ``pages`` + parameter, mirroring TS's behaviour where PDFs are surfaced through + Read rather than auto-inlined via @-mention.""" + if ext == "pdf": + return ( + "PDFs cannot be inlined as @-mention text. Use the Read tool " + "with the ``pages`` parameter (e.g. ``pages=\"1-5\"``) to view " + "specific page ranges." + ) + return ( + "This file is binary and cannot be inlined as text. Use the Read " + "tool to view it if it is a supported format." + ) + def _try_build_image_attachment( expanded: str, @@ -359,10 +559,43 @@ def expand_at_mentions( # contains the path, so the model can call ``Read`` # explicitly to see a real error. continue - try: - with open(expanded, "r", encoding="utf-8", errors="replace") as fh: - data = fh.read() - except OSError: + # Binary branch: known binary extensions (PDF, archives, ...) + # OR a content sniff that finds a NUL byte. Same Bug A + # pattern as the image branch -- opening a binary file in + # text mode produces mojibake the model hallucinates from. + # We emit a small reminder pointing at the Read tool + # instead. PDF gets a more specific hint because Read + # supports it via the ``pages`` parameter; everything else + # gets a generic message. + if ext in _AT_MENTION_BINARY_EXTENSIONS or _looks_like_binary(expanded): + attachments.append( + { + "kind": "binary", + "path": expanded, + "display_path": display_path, + "ext": ext, + "hint": _binary_hint_for_ext(ext), + } + ) + continue + data = _read_text_with_encoding(expanded) + if data is None: + # ``_read_text_with_encoding`` returns None on either + # OSError OR when the decoded text was mostly U+FFFD + # replacement chars (a binary blob with a spoofed + # BOM, or a file in an encoding we can't auto-detect). + # Emit a binary attachment instead of dropping + # silently so the user sees what happened and the + # model gets a Read-tool nudge. + attachments.append( + { + "kind": "binary", + "path": expanded, + "display_path": display_path, + "ext": ext, + "hint": _binary_hint_for_ext(ext), + } + ) continue attachments.append( { @@ -412,6 +645,16 @@ def format_at_mention_attachments(attachments: list[dict[str, Any]]) -> str: elif kind == "image": # See docstring -- handled separately as a content block. continue + elif kind == "binary": + # PDFs / archives / docx / ... — emit a short reminder pointing + # the model at the Read tool so the @-mention is acknowledged + # without flooding the prompt with mojibake. + blocks.append( + f"\n" + f"@-mentioned file {att['display_path']} is a binary file " + f"and was not inlined. {att.get('hint', '')}\n" + f"" + ) elif kind == "agent_mention": # Mirrors ``typescript/src/utils/messages.ts`` ``agent_mention`` # case: the reminder nudges the model to delegate to the named diff --git a/src/providers/base.py b/src/providers/base.py index 8e40cf9..163cef6 100644 --- a/src/providers/base.py +++ b/src/providers/base.py @@ -144,5 +144,25 @@ def _get_model(self, **kwargs) -> str: return strip_1m_context_suffix(raw) if raw else raw def _prepare_messages(self, messages: list[MessageInput]) -> list[dict[str, Any]]: - """Convert provider messages to API dictionary format.""" - return [msg if isinstance(msg, dict) else msg.to_dict() for msg in messages] + """Convert provider messages to API dictionary format. + + Also runs ``validate_images_for_api`` so every provider — not just + the Anthropic-direct ``services/api/claude.py:call_model`` path — + rejects oversize base64 images before the network round trip. + Anthropic's 5 MB hard limit applies to its own provider; for + OpenAI-compatible providers the check runs on the still-Anthropic + shape (subclasses call ``super()._prepare_messages`` before + translating to ``image_url``), so the same client-side guard + applies. Providers without image support are unaffected: the + walker only inspects ``type=image`` blocks. Raises + ``ImageSizeError``; callers (``query._call_model_sync``, + ``tool_system.agent_loop._call_provider_for_turn``) translate it + into a media-size error message rather than letting it surface as + an opaque API failure. + """ + prepared = [msg if isinstance(msg, dict) else msg.to_dict() for msg in messages] + # Local import to avoid a top-level dependency from base.py into + # the utils package, matching the style of ``_get_model``. + from src.utils.image_validation import validate_images_for_api + validate_images_for_api(prepared) + return prepared diff --git a/src/providers/openai_compatible.py b/src/providers/openai_compatible.py index 002803d..16d7b38 100644 --- a/src/providers/openai_compatible.py +++ b/src/providers/openai_compatible.py @@ -47,12 +47,79 @@ def _anthropic_image_block_to_openai(block: dict[str, Any]) -> dict[str, Any] | } +def _anthropic_document_block_to_openai(block: dict[str, Any]) -> dict[str, Any] | None: + """Translate an Anthropic document content block (PDF) to OpenAI's + ``file`` content shape. + + Anthropic: ``{"type": "document", "source": {"type": "base64", + "media_type": "application/pdf", "data": "..."}}``. + OpenAI: ``{"type": "file", "file": {"filename": "document.pdf", + "file_data": "data:application/pdf;base64,..."}}``. + + No production path currently produces ``DocumentBlock`` for an + OpenAI-compatible provider (PDFs flow through Read tool's + ``_read_map_result_to_api`` as text). This translator exists so that + if ``DocumentBlock`` ever shows up — e.g. a future @-mention path or + a third-party tool returning a PDF — it's converted to the closest + OpenAI shape instead of silently passing through as an unrecognised + Anthropic-shape block (which OpenAI-compat providers either reject + or drop). Mirrors the ``image`` translator's defensive contract. + + Returns ``None`` when the block isn't a recognisable Anthropic + document block (caller should keep the original). + """ + if not isinstance(block, dict) or block.get("type") != "document": + return None + source = block.get("source") + if not isinstance(source, dict): + return None + if source.get("type") != "base64": + return None + media_type = source.get("media_type") or "application/pdf" + data = source.get("data") + if not data or not isinstance(data, str): + # Same producer-bug guard as the image translator: an empty + # ``data`` field would produce ``data:application/pdf;base64,`` + # which the server rejects. Return ``None`` so the caller keeps + # the original block and the upstream serializer surfaces the + # malformed shape instead of producing a request the server will + # fail confusingly. + return None + # OpenAI's ``file`` block accepts an optional ``filename``; many + # provider impls require it. Use a stable default since Anthropic's + # document source carries no filename hint. + return { + "type": "file", + "file": { + "filename": "document.pdf", + "file_data": f"data:{media_type};base64,{data}", + }, + } + + +def _translate_anthropic_multimodal_block(block: Any) -> dict[str, Any] | None: + """Try every Anthropic-shape multimodal translator on ``block``. + + Returns the OpenAI-shape replacement, or ``None`` when the block isn't + a translatable multimodal type (text / unknown blocks pass through + untouched at the call site). Centralised so the converter doesn't + grow a long chain of ``if isinstance/elif`` branches as new + multimodal shapes appear. + """ + if not isinstance(block, dict): + return None + translated = _anthropic_image_block_to_openai(block) + if translated is not None: + return translated + return _anthropic_document_block_to_openai(block) + + def _convert_anthropic_messages_to_openai( messages: list[dict[str, Any]], ) -> list[dict[str, Any]]: """Convert Anthropic-format messages to OpenAI chat-completion format. - Handles three transformations: + Handles four transformations: 1. Assistant messages with tool_use content blocks → assistant + tool_calls 2. User messages with tool_result content blocks → role=tool messages 3. Anthropic image content blocks (in user messages or tool_result @@ -60,6 +127,11 @@ def _convert_anthropic_messages_to_openai( both @image.png @-mentions and Read-tool image returns ship Anthropic-shape blocks; without translation, OpenAI-compatible providers either reject the request or silently drop the image. + 4. Anthropic document content blocks → OpenAI ``file`` blocks. + Defensive translation: no production path currently produces + ``DocumentBlock`` for an OpenAI-compatible provider, but if one + ever appears it lands in the closest OpenAI shape rather than + passing through as an unrecognised Anthropic block. """ result: list[dict[str, Any]] = [] for msg in messages: @@ -131,9 +203,10 @@ def _convert_anthropic_messages_to_openai( if isinstance(block, dict) and block.get("type") == "tool_result": tool_results.append(block) else: - # Translate Anthropic image blocks to OpenAI ``image_url`` - # shape; pass everything else through unchanged. - translated = _anthropic_image_block_to_openai(block) if isinstance(block, dict) else None + # Translate Anthropic multimodal blocks (image / document) + # to their OpenAI shapes; pass everything else through + # unchanged. + translated = _translate_anthropic_multimodal_block(block) non_tool_blocks.append(translated if translated is not None else block) # Emit non-tool content first as user message @@ -151,17 +224,17 @@ def _convert_anthropic_messages_to_openai( # blocks via ``image_url``. The model sees the image # alongside the tool result, which is the closest semantic # match to Anthropic's native multimodal tool_result. - image_blocks_from_tool: list[dict[str, Any]] = [] + multimodal_blocks_from_tool: list[dict[str, Any]] = [] if isinstance(raw_content, list): # Flatten nested content blocks to text parts = [] for item in raw_content: if isinstance(item, dict) and item.get("type") == "text": parts.append(item.get("text", "")) - elif isinstance(item, dict) and item.get("type") == "image": - translated_img = _anthropic_image_block_to_openai(item) - if translated_img is not None: - image_blocks_from_tool.append(translated_img) + elif isinstance(item, dict) and item.get("type") in ("image", "document"): + translated_multimodal = _translate_anthropic_multimodal_block(item) + if translated_multimodal is not None: + multimodal_blocks_from_tool.append(translated_multimodal) elif isinstance(item, str): parts.append(item) else: @@ -172,30 +245,85 @@ def _convert_anthropic_messages_to_openai( else: flat_content = str(raw_content) - # OpenAI requires ``content`` to be a non-empty string on - # tool messages; if the original was image-only, leave a - # short pointer so the tool_call is paired with something. - # The follow-up ``role=user`` message carrying the - # ``image_url`` block is NOT linked to ``tool_call_id`` -- - # OpenAI's wire format has no way to attach a tool_call_id - # to a user message. The model sees ``tool(text result)`` - # followed by ``user(image)`` and may briefly treat the - # trailing user message as a new prompt. The placeholder - # text reduces that risk, but it's a known OpenAI-API - # limitation we cannot fully paper over; on Anthropic the - # equivalent stays as a single multimodal tool_result. - if not flat_content and image_blocks_from_tool: - flat_content = "[image content delivered in the following message]" + # KNOWN OPENAI-API LIMITATION — tool→user split: + # ---------------------------------------------------- + # OpenAI's wire format requires ``content`` on a tool + # message to be a non-empty string AND does not allow + # image_url / file blocks in a tool message. So when + # an Anthropic tool_result carries multimodal content, + # we must split it: emit ``role=tool`` with the text + # body (or a placeholder if none), then a synthetic + # ``role=user`` carrying the translated multimodal + # blocks. The synthetic user message CANNOT be linked + # back to its parent tool_call_id — OpenAI provides no + # wire-level mechanism for that. The model sees + # ``tool(text)`` followed by ``user(multimodal)`` and + # could briefly treat the trailing user message as a + # new prompt rather than the tool's payload. + # + # Mitigations applied here: + # 1. Tool message carries an explicit "see following + # message" suffix (image-only) or a "see also next + # message" line (text+image) referencing the + # tool_use_id. This is symmetric across both the + # image-only and text+image branches so the + # correlation hint is always present when a split + # happened. + # 2. Synthetic user message starts with a tiny text + # block that names the tool_use_id. The image / + # file blocks follow. The model now has the link + # visible from BOTH directions even though + # ``tool_call_id`` only exists on the tool message. + # On Anthropic the equivalent stays a single + # multimodal tool_result with no split; there is no + # equivalent Anthropic-side limitation. + tool_use_id = tr.get("tool_use_id", "") + if multimodal_blocks_from_tool: + correlation = ( + f"[multimodal content for tool_use_id={tool_use_id} " + "delivered in the following message]" + ) + if flat_content: + # Text + multimodal: append a one-line pointer so + # the tool message carries the same correlation + # marker the image-only branch produces. + flat_content = f"{flat_content}\n\n{correlation}" + else: + # Image-only tool_result: the pointer is the + # whole tool-message body. OpenAI rejects empty + # tool-message content, so this also doubles as + # the non-empty guard. + flat_content = correlation + if not flat_content: + # Defensive: if there were no multimodal blocks AND + # no text body (e.g. ``content: []`` or ``content: ""`` + # — the converter's fallthrough JSON-dumps unknown + # block types into ``flat_content`` so those don't + # land here), emit a literal sentinel so OpenAI's + # "tool message content must be non-empty" + # requirement is honoured. + flat_content = "[empty tool result]" result.append({ "role": "tool", - "tool_call_id": tr.get("tool_use_id", ""), + "tool_call_id": tool_use_id, "content": flat_content, }) - if image_blocks_from_tool: + if multimodal_blocks_from_tool: + # Lead with a tiny text block naming the parent + # tool_use_id so the model can correlate this + # synthetic user message back to the prior + # ``role=tool`` message even though OpenAI's wire + # format gives no tool_call_id on user messages. + correlation_text = { + "type": "text", + "text": ( + f"[content for tool_use_id={tool_use_id}]" + ), + } result.append({ "role": "user", - "content": image_blocks_from_tool, + "content": [correlation_text, *multimodal_blocks_from_tool], }) continue diff --git a/src/query/query.py b/src/query/query.py index 68b2175..dc87f46 100644 --- a/src/query/query.py +++ b/src/query/query.py @@ -22,6 +22,7 @@ from ..tool_system.protocol import ToolCall, ToolResult from ..tool_system.registry import ToolRegistry from ..utils.abort_controller import AbortController, AbortError +from ..utils.image_validation import ImageSizeError from ..providers.base import BaseProvider, ChatResponse from .config import QueryConfig, build_query_config @@ -405,6 +406,17 @@ async def _call_model_sync( # to those substring checks could accidentally match an abort # reason and convert the cancel into a model-error reply. raise + except ImageSizeError as e: + # Client-side pre-API validation tripped (BaseProvider._prepare_messages). + # Surface as a media_size error with the same classification the + # server-side guard uses, so the reactive-compact recovery path + # (Ch5/B.2) treats them identically. + err_msg = _create_assistant_api_error_message( + f"Media too large: {e}", + error="media_size", + ) + err_msg._api_error = "media_size" # type: ignore[attr-defined] + return [err_msg], [] except Exception as e: if _diag: logger.warning("[DIAG] _call_model_sync: EXCEPTION after %.1fs: %s", time.monotonic() - _t0, e) diff --git a/src/repl/core.py b/src/repl/core.py index c658598..bef11ea 100644 --- a/src/repl/core.py +++ b/src/repl/core.py @@ -2316,6 +2316,14 @@ def chat(self, user_input: str, max_turns: int | None = None): self.console.print( f"[dim] ⎿ Read image {att['display_path']}[/dim]" ) + elif kind == "binary": + # Binary file (PDF, archive, ...) — show what happened + # so the user isn't surprised that no content was + # inlined. The reminder text already nudges the model + # toward the Read tool. + self.console.print( + f"[dim] ⎿ Skipped binary file {att['display_path']}[/dim]" + ) else: label = "directory" if kind == "directory" else "file" self.console.print( diff --git a/src/services/api/claude.py b/src/services/api/claude.py index 19592a4..67a60c9 100644 --- a/src/services/api/claude.py +++ b/src/services/api/claude.py @@ -293,10 +293,11 @@ async def call_model( # instead of a generic API failure. Mirrors TS # validateImagesForAPI invocation at utils/imageValidation.ts. # - # TODO: only the Anthropic provider path runs this check. - # src/providers/{openai_compatible,anthropic_provider,minimax_provider}.py - # all bypass it. Promote validation into BaseProvider._prepare_messages - # if/when those providers grow image-content-block support. + # This path uses the anthropic SDK directly (not BaseProvider), + # so we still validate here. Provider-based paths + # (chat_stream_response / chat) validate inside + # BaseProvider._prepare_messages and surface ImageSizeError via + # query._call_model_sync's media_size handler. try: from src.utils.image_validation import ImageSizeError, validate_images_for_api validate_images_for_api(api_messages) diff --git a/src/tool_system/agent_loop.py b/src/tool_system/agent_loop.py index c2cc1c5..7844ca5 100644 --- a/src/tool_system/agent_loop.py +++ b/src/tool_system/agent_loop.py @@ -16,6 +16,7 @@ from ..providers.anthropic_provider import AnthropicProvider from ..providers.minimax_provider import MinimaxProvider from ..utils.abort_controller import AbortError, AbortSignal +from ..utils.image_validation import ImageSizeError def _is_anthropic_provider(provider: BaseProvider) -> bool: @@ -176,6 +177,12 @@ def _call_provider_for_turn( # User-initiated cancel must propagate; do not fall through # to the non-streaming code path. raise + except ImageSizeError: + # Pre-API client-side validation tripped — falling back to + # the non-streaming path would re-run ``_prepare_messages`` + # and raise the same error. Propagate so the outer loop can + # surface a media_size error message instead of double-call. + raise except Exception: # Preserve existing stable behavior if streaming is unsupported or fails. pass diff --git a/src/utils/image_validation.py b/src/utils/image_validation.py index 7b0a5ed..d76c708 100644 --- a/src/utils/image_validation.py +++ b/src/utils/image_validation.py @@ -76,11 +76,56 @@ def _get_image_source_data(block: Any) -> str | None: return data if isinstance(data, str) else None +# Cap on nested tool_result depth the walker will descend into. The +# Anthropic API doesn't accept arbitrarily deep nesting in production +# (tool_result content is normally one level deep); the limit guards a +# pathologically constructed message from blowing Python's recursion +# limit. Pick a value comfortably above any realistic depth. +_MAX_TOOL_RESULT_DEPTH = 32 + + +def _iter_image_blocks(content: Any, _depth: int = 0): + """Yield every image block reachable from ``content``. + + Walks top-level blocks AND descends into ``tool_result`` block content + lists. Required because the Read tool now returns images inside a + tool_result's ``content`` (post-#154/#155 image-handling parity); a + walker that only looks at the outer user message would miss them and + let an oversized base64 reach the API. + + Recursion is bounded by ``_MAX_TOOL_RESULT_DEPTH`` so an adversarial + or accidentally over-nested message cannot hit Python's default + recursion limit (typically ~1000). Production tool_result nesting is + single-digit; the cap is generous defense-in-depth. Depth-first + yield order matches a reader's mental model of the message: for + ``[ImgA, ToolResult(ImgB), ImgC]`` the walker yields A, B, C, which + matches the index numbering used in ``ImageSizeError``. + """ + if _depth > _MAX_TOOL_RESULT_DEPTH: + return + for block in _iter_content_blocks(content): + btype = _get_block_type(block) + if btype == "image": + yield block + elif btype == "tool_result": + # tool_result.content is either a string (no images) or a + # list of blocks; recurse so we descend in document order. + if isinstance(block, dict): + inner = block.get("content") + else: + inner = getattr(block, "content", None) + if isinstance(inner, list): + yield from _iter_image_blocks(inner, _depth + 1) + + def validate_images_for_api(messages: list[Any]) -> None: """Walk ``messages`` and raise ImageSizeError if any image is too large. Each image block's base64 string length is compared to ``API_IMAGE_MAX_BASE64_SIZE`` (5 MB). Mirrors TS imageValidation.ts:52-105. + + Recursion covers tool_result-nested images so the Read tool's image + return path is also guarded; see ``_iter_image_blocks``. """ oversized: list[tuple[int, int]] = [] for msg in messages: @@ -89,9 +134,7 @@ def validate_images_for_api(messages: list[Any]) -> None: content = msg.get("content") else: content = getattr(msg, "content", None) - for block in _iter_content_blocks(content): - if _get_block_type(block) != "image": - continue + for block in _iter_image_blocks(content): data = _get_image_source_data(block) if data is None: continue diff --git a/tests/test_agent_loop_image_size_propagation.py b/tests/test_agent_loop_image_size_propagation.py new file mode 100644 index 0000000..c181faa --- /dev/null +++ b/tests/test_agent_loop_image_size_propagation.py @@ -0,0 +1,96 @@ +"""Pin that ``_call_provider_for_turn`` re-raises ``ImageSizeError`` +instead of swallowing it via the generic ``except Exception`` fallback. + +If the streaming path's ``_prepare_messages`` raises ``ImageSizeError``, +falling back to ``provider.chat()`` is pointless: ``chat()`` calls the +same ``_prepare_messages`` and will raise again. Propagating the +streaming-path exception lets the outer query loop translate it into a +clean media_size error message immediately. +""" + +from __future__ import annotations + +import unittest +from typing import Any +from unittest.mock import MagicMock + +from src.providers.base import ChatResponse +from src.tool_system.agent_loop import _call_provider_for_turn +from src.utils.image_validation import ImageSizeError + + +class TestImageSizeErrorPropagation(unittest.TestCase): + def _make_provider(self, stream_exc: Exception, chat_response: Any = None) -> MagicMock: + provider = MagicMock() + provider.chat_stream_response.side_effect = stream_exc + if chat_response is None: + # Should not be reached in the propagation test; trip if so. + provider.chat.side_effect = AssertionError( + "chat() should not be invoked after streaming ImageSizeError" + ) + else: + provider.chat.return_value = chat_response + return provider + + def test_streaming_imagesize_propagates_without_chat_fallback(self) -> None: + oversize = ImageSizeError([(6 * 1024 * 1024, 5 * 1024 * 1024)]) + provider = self._make_provider(oversize) + with self.assertRaises(ImageSizeError): + _call_provider_for_turn( + provider=provider, + api_messages=[{"role": "user", "content": "x"}], + call_kwargs={}, + stream=True, + on_text_chunk=None, + ) + # chat() must NOT have been called because we shouldn't double-attempt + provider.chat.assert_not_called() + + def test_streaming_not_implemented_still_falls_back_to_chat(self) -> None: + """Regression: only ImageSizeError is the new re-raise. Existing + fallback behavior for NotImplementedError must stay intact.""" + ok = ChatResponse( + content="hi", + model="test", + usage={"input_tokens": 1, "output_tokens": 1}, + finish_reason="end_turn", + tool_uses=None, + ) + provider = self._make_provider(NotImplementedError(), chat_response=ok) + response, streamed = _call_provider_for_turn( + provider=provider, + api_messages=[{"role": "user", "content": "x"}], + call_kwargs={}, + stream=True, + on_text_chunk=None, + ) + self.assertEqual(response.content, "hi") + self.assertFalse(streamed) + provider.chat.assert_called_once() + + def test_streaming_generic_exception_still_falls_back_to_chat(self) -> None: + """Regression: pre-existing ``except Exception: pass`` fallback must + keep working for unrelated stream-time errors so user behavior is + unchanged for transient SDK glitches.""" + ok = ChatResponse( + content="hi", + model="test", + usage={"input_tokens": 1, "output_tokens": 1}, + finish_reason="end_turn", + tool_uses=None, + ) + provider = self._make_provider(RuntimeError("stream blew up"), chat_response=ok) + response, streamed = _call_provider_for_turn( + provider=provider, + api_messages=[{"role": "user", "content": "x"}], + call_kwargs={}, + stream=True, + on_text_chunk=None, + ) + self.assertEqual(response.content, "hi") + self.assertFalse(streamed) + provider.chat.assert_called_once() + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_at_mention_binary_files.py b/tests/test_at_mention_binary_files.py new file mode 100644 index 0000000..4dfa1a8 --- /dev/null +++ b/tests/test_at_mention_binary_files.py @@ -0,0 +1,313 @@ +"""Tests that ``@file.pdf`` and other binary @-mentions are routed to a +"binary" attachment instead of being opened in text mode (the Bug A +pattern that produced mojibake the model hallucinated from). + +PDF in TS is surfaced through the Read tool's ``pages`` parameter, not +auto-inlined via @-mention. Python now matches: we emit a small system +reminder pointing the model at Read, rather than dumping replacement +chars into the prompt. + +Coverage: +- Known binary extensions (.pdf, .zip, .docx, ...) skip the text read. +- PDF reminder mentions the Read tool's ``pages`` parameter specifically. +- Files with NUL bytes are caught by the content sniffer even when their + extension isn't enumerated (defense in depth). +- Normal text files still flow through the unchanged ``kind="file"`` path. +- ``format_at_mention_attachments`` renders the new "binary" kind without + leaking content bytes. +""" + +from __future__ import annotations + +import os +import tempfile +import unittest +from pathlib import Path + +from src.command_system.input_processing import ( + expand_at_mentions, + format_at_mention_attachments, +) + + +def _render(atts: list[dict]) -> bytes: + """Helper: render attachments and return the bytes that would be + embedded in the prompt. Tests assert on the bytes (not the str) so + NUL chars and utf-8 replacement chars are detectable. + """ + return format_at_mention_attachments(atts).encode("utf-8", errors="surrogatepass") + + +class TestPdfAtMention(unittest.TestCase): + def setUp(self) -> None: + self.tmp = tempfile.TemporaryDirectory() + self.cwd = self.tmp.name + + def tearDown(self) -> None: + self.tmp.cleanup() + + def _write(self, name: str, data: bytes) -> str: + path = os.path.join(self.cwd, name) + Path(path).write_bytes(data) + return path + + def test_pdf_extension_routes_to_binary_attachment(self) -> None: + # Synthetic PDF magic header + opaque bytes — never opened as text. + self._write("doc.pdf", b"%PDF-1.7\n\x00\x01\x02\x03 lorem") + _, atts = expand_at_mentions("look at @doc.pdf", cwd=self.cwd) + self.assertEqual(len(atts), 1) + att = atts[0] + self.assertEqual(att["kind"], "binary") + self.assertEqual(att["ext"], "pdf") + self.assertEqual(att["display_path"], "doc.pdf") + # PDF hint must mention Read tool + pages parameter so the model + # has a concrete next step. + self.assertIn("Read tool", att["hint"]) + self.assertIn("pages", att["hint"]) + + def test_pdf_attachment_does_not_leak_bytes(self) -> None: + """Critical: the binary attachment must NOT carry the file content. + Bug A's failure mode was shipping mojibake into the prompt; the + whole point of the binary branch is to avoid that.""" + self._write("doc.pdf", b"%PDF-1.7\n\x00binarygarbagehere") + _, atts = expand_at_mentions("@doc.pdf", cwd=self.cwd) + att = atts[0] + self.assertNotIn("content", att) + rendered = format_at_mention_attachments(atts) + self.assertNotIn("binarygarbagehere", rendered) + # The reminder structure should be intact and reference the file. + self.assertIn("", rendered) + self.assertIn("doc.pdf", rendered) + self.assertIn("binary", rendered.lower()) + + def test_archive_extension_routes_to_binary(self) -> None: + self._write("payload.zip", b"PK\x03\x04 archive bytes here") + _, atts = expand_at_mentions("@payload.zip", cwd=self.cwd) + self.assertEqual(len(atts), 1) + self.assertEqual(atts[0]["kind"], "binary") + # Generic hint for non-PDF binaries. + self.assertNotIn("pages", atts[0]["hint"]) + + def test_docx_routes_to_binary(self) -> None: + # docx is just a zip; same extension-based detection should fire. + self._write("report.docx", b"PK\x03\x04 docx-ish bytes") + _, atts = expand_at_mentions("@report.docx", cwd=self.cwd) + self.assertEqual(atts[0]["kind"], "binary") + + def test_text_file_unaffected(self) -> None: + self._write("notes.txt", b"hello world\nline two\n") + _, atts = expand_at_mentions("@notes.txt", cwd=self.cwd) + self.assertEqual(len(atts), 1) + self.assertEqual(atts[0]["kind"], "file") + self.assertIn("hello world", atts[0]["content"]) + + def test_unknown_extension_with_nul_bytes_caught_by_sniffer(self) -> None: + """Defense in depth: a misnamed binary (e.g. .dat / .txt) whose + extension we haven't enumerated should still be classified as + binary based on a NUL byte in the head of the file. Prevents the + mojibake regression from sneaking in via any extension we missed.""" + self._write("blob.dat", b"\x00\x01\x02 actual binary content") + _, atts = expand_at_mentions("@blob.dat", cwd=self.cwd) + self.assertEqual(len(atts), 1) + self.assertEqual(atts[0]["kind"], "binary") + + def test_text_without_nul_bytes_still_text(self) -> None: + """Regression: the sniffer must only fire on actual NULs, not on + legitimate non-ASCII text. UTF-8 source code with snowmen and + accents must still land as ``kind=file``.""" + self._write("notes.md", "héllo ☃ wörld\n".encode("utf-8")) + _, atts = expand_at_mentions("@notes.md", cwd=self.cwd) + self.assertEqual(atts[0]["kind"], "file") + self.assertIn("héllo", atts[0]["content"]) + + def test_utf16_le_bom_decoded_properly_no_mojibake(self) -> None: + """Windows-emitted UTF-16-LE files have NUL bytes for every ASCII + char and would trip the NUL sniffer. A BOM prefix tells us the + file is real text that just needs a different decoder; the text + branch picks the codec from the BOM and produces clean text — NOT + the mojibake-with-embedded-NULs that an unconditional utf-8 read + would produce (that would re-introduce the exact failure mode + this whole work was designed to prevent). + + The test asserts BOTH on the attachment content AND on the + rendered system-reminder bytes, because the prompt-time failure + mode is mojibake-in-system-reminder. Asserting only the routing + decision (kind=file) would pin a passing bug. + """ + self._write("win.log", b"\xff\xfe" + "hello world\nsecond line\n".encode("utf-16-le")) + _, atts = expand_at_mentions("@win.log", cwd=self.cwd) + self.assertEqual(len(atts), 1) + self.assertEqual(atts[0]["kind"], "file") + self.assertEqual(atts[0]["content"], "hello world\nsecond line\n") + # CRITICAL: rendered prompt bytes must NOT contain NULs or utf-8 + # replacement chars. A regression here is exactly Bug A. + rendered = _render(atts) + self.assertNotIn(b"\x00", rendered) + self.assertNotIn(b"\xef\xbf\xbd", rendered) # utf-8 of U+FFFD replacement char + self.assertIn(b"hello world", rendered) + + def test_utf16_be_bom_decoded_properly(self) -> None: + self._write("be.log", b"\xfe\xff" + "hello\n".encode("utf-16-be")) + _, atts = expand_at_mentions("@be.log", cwd=self.cwd) + self.assertEqual(atts[0]["kind"], "file") + self.assertEqual(atts[0]["content"], "hello\n") + rendered = _render(atts) + self.assertNotIn(b"\x00", rendered) + self.assertNotIn(b"\xef\xbf\xbd", rendered) + self.assertIn(b"hello", rendered) + + def test_utf8_bom_stripped(self) -> None: + """UTF-8 BOM (rare, but Windows-emitted) — must be transparently + decoded so the BOM character itself doesn't end up in the prompt + content. ``utf-8-sig`` codec handles this.""" + self._write("bom.md", b"\xef\xbb\xbfhello\n") + _, atts = expand_at_mentions("@bom.md", cwd=self.cwd) + self.assertEqual(atts[0]["kind"], "file") + self.assertEqual(atts[0]["content"], "hello\n") + # BOM character (U+FEFF) must NOT appear in either content or render. + self.assertNotIn("", atts[0]["content"]) + rendered = _render(atts) + self.assertNotIn(b"\xef\xbb\xbf", rendered) + + def test_utf32_le_bom_decoded(self) -> None: + self._write("u32.log", b"\xff\xfe\x00\x00" + "hi\n".encode("utf-32-le")) + _, atts = expand_at_mentions("@u32.log", cwd=self.cwd) + self.assertEqual(atts[0]["kind"], "file") + self.assertEqual(atts[0]["content"], "hi\n") + rendered = _render(atts) + self.assertNotIn(b"\x00", rendered) + + def test_no_bom_utf16_le_still_classified_binary(self) -> None: + """A UTF-16 LE file WITHOUT a BOM still has NULs and we have no + signal to tell it from a binary blob. Falls through to the + binary branch — strictly safer than shipping NUL-laden mojibake.""" + self._write("nobom.log", "hello world\n".encode("utf-16-le")) + _, atts = expand_at_mentions("@nobom.log", cwd=self.cwd) + self.assertEqual(atts[0]["kind"], "binary") + + def test_spoofed_bom_binary_blob_does_not_leak_bug_a_signatures(self) -> None: + """Adversarial case: a binary blob whose extension is NOT in + ``_AT_MENTION_BINARY_EXTENSIONS`` and whose head bytes start + with a valid-looking UTF-16 BOM. UTF-16 decodes every 2-byte + pair to *some* Unicode codepoint, so the BOM short-circuit + produces a wall of nonsensical CJK/symbol characters rather + than NUL- or U+FFFD-laden mojibake. + + The Bug A failure family is specifically about NUL bytes and + utf-8 replacement chars being smuggled into a system-reminder + and the model latching onto recognisable ASCII fragments. This + test pins THAT invariant — the rendered prompt bytes must not + contain NUL or U+FFFD — and accepts that an adversarial blob + may still get inlined as text. A stronger garble detector that + flagged this case would false-positive on legitimate + non-Latin-script UTF-16 files (Chinese/Japanese logs, etc.). + + If a future failure mode hinges on recognisable English-ASCII + fragments surviving the UTF-16 decode, that's the trigger to + revisit — not this case. + """ + # Bytes after the BOM decode to high-plane codepoints (no + # English text in the output), so even if the wall-of-glyphs + # ships to the model there's no ASCII to latch onto. + payload = b"\xff\xfe" + bytes(range(1, 200)) * 4 + self._write("blob.dat", payload) + _, atts = expand_at_mentions("@blob.dat", cwd=self.cwd) + self.assertEqual(len(atts), 1) + # CRITICAL: Bug A invariants — no NULs, no U+FFFD replacement chars + # in the rendered prompt bytes. The walk-of-glyphs is acceptable; + # NUL- or U+FFFD-laden mojibake is not. + rendered = _render(atts) + self.assertNotIn(b"\x00", rendered) + self.assertNotIn(b"\xef\xbf\xbd", rendered) + + def test_nul_heavy_decode_falls_back_to_binary(self) -> None: + """Adversarial case found mid-review: a binary blob with a + spoofed UTF-16 BOM and NUL-heavy body. The UTF-16 decode maps + each NUL pair to U+0000 — Python str chars that round-trip + BACK to NUL bytes when encoded to utf-8 for the API. Without + the NUL-char check in ``_decoded_text_looks_garbled`` the + prompt bytes would carry literal NULs — exactly Bug A's + failure mode.""" + payload = b"\xff\xfe" + b"\x00" * 400 + self._write("nuly.dat", payload) + _, atts = expand_at_mentions("@nuly.dat", cwd=self.cwd) + self.assertEqual(len(atts), 1) + self.assertEqual(atts[0]["kind"], "binary", + "NUL-heavy decode must NOT inline as kind=file") + rendered = _render(atts) + self.assertNotIn(b"\x00", rendered) + + def test_high_replacement_char_decode_falls_back_to_binary(self) -> None: + """A file whose decoded form is *mostly* U+FFFD replacement + characters is rejected from the text branch — that pattern is + a high-confidence signal that the file isn't the encoding the + BOM (or default utf-8) implied. Specifically pins the + ``_decoded_text_looks_garbled`` threshold so a regression that + opens the floodgates again gets caught. + + Constructed without a BOM so it falls through to utf-8 decode, + where deliberately-invalid utf-8 byte sequences produce U+FFFD. + """ + # Mostly 0x80-0xBF bytes (utf-8 continuation bytes with no + # leading byte → each one becomes U+FFFD). Mixed with some + # printable ASCII to keep len > 0. + payload = b"!" + b"\x80\x81\x82\x83" * 200 + self._write("noisy.bad", payload) + _, atts = expand_at_mentions("@noisy.bad", cwd=self.cwd) + self.assertEqual(len(atts), 1) + self.assertEqual(atts[0]["kind"], "binary", + "decoded text with high U+FFFD fraction must NOT inline as kind=file") + rendered = _render(atts) + self.assertNotIn(b"\xef\xbf\xbd", rendered) + + def test_image_still_routes_to_image_branch(self) -> None: + """Regression: image extensions must keep going through the image + pipeline -- binary branch must come AFTER image branch in the + dispatch order, so PNG/JPEG never lose their inline behaviour.""" + # Build a real 1x1 PNG via Pillow so the image pipeline actually + # produces an attachment; otherwise the test couldn't tell apart + # "image branch dropped silently" from "binary branch fired". + try: + from PIL import Image + except ImportError: # pragma: no cover + self.skipTest("Pillow required") + import io + buf = io.BytesIO() + Image.new("RGB", (1, 1), (255, 0, 0)).save(buf, format="PNG") + self._write("pixel.png", buf.getvalue()) + _, atts = expand_at_mentions("@pixel.png", cwd=self.cwd) + self.assertEqual(len(atts), 1) + # Image attachment, NOT binary. + self.assertEqual(atts[0]["kind"], "image") + + +class TestBinaryAttachmentRendering(unittest.TestCase): + def test_rendered_reminder_for_pdf(self) -> None: + att = { + "kind": "binary", + "path": "/x/y/report.pdf", + "display_path": "report.pdf", + "ext": "pdf", + "hint": "PDFs cannot be inlined as @-mention text. Use the Read tool with the ``pages`` parameter.", + } + rendered = format_at_mention_attachments([att]) + self.assertIn("", rendered) + self.assertIn("report.pdf", rendered) + self.assertIn("binary", rendered.lower()) + self.assertIn("Read tool", rendered) + + def test_rendered_reminder_for_generic_binary(self) -> None: + att = { + "kind": "binary", + "path": "/x/y/blob.bin", + "display_path": "blob.bin", + "ext": "bin", + "hint": "This file is binary and cannot be inlined as text.", + } + rendered = format_at_mention_attachments([att]) + self.assertIn("blob.bin", rendered) + self.assertIn("binary", rendered.lower()) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_base_provider_image_validation.py b/tests/test_base_provider_image_validation.py new file mode 100644 index 0000000..caaa945 --- /dev/null +++ b/tests/test_base_provider_image_validation.py @@ -0,0 +1,141 @@ +"""Tests that ``BaseProvider._prepare_messages`` runs the same +``validate_images_for_api`` guard that the Anthropic-direct ``call_model`` +path runs, so every provider — anthropic, openai-compatible, glm, +minimax, deepseek, openrouter — rejects oversized base64 images +client-side instead of letting the API return an opaque error. + +Pre-fix gap: the validator only ran on ``services/api/claude.py:call_model``, +which is the parity-streaming path. Production paths go through +``provider.chat_stream_response`` and skipped the check entirely. +""" + +from __future__ import annotations + +import unittest +from typing import Any + +from src.providers.base import BaseProvider, ChatMessage +from src.providers.openai_compatible import OpenAICompatibleProvider +from src.utils.image_processor import API_IMAGE_MAX_BASE64_SIZE +from src.utils.image_validation import ImageSizeError + + +class _StubProvider(BaseProvider): + """Concrete subclass that exposes ``_prepare_messages`` for direct test + invocation without standing up a real chat client.""" + + def chat(self, messages, tools=None, **kwargs): # pragma: no cover - unused + raise NotImplementedError + + def chat_stream(self, messages, tools=None, **kwargs): # pragma: no cover - unused + raise NotImplementedError + + def get_available_models(self): # pragma: no cover - unused + return [] + + +class _StubOpenAICompat(OpenAICompatibleProvider): + """Concrete OpenAI-compatible subclass for exercising the override path.""" + + def _create_client(self): # pragma: no cover - unused + raise NotImplementedError + + def get_available_models(self): # pragma: no cover - unused + return [] + + +def _img_block(b64_len: int) -> dict[str, Any]: + return { + "type": "image", + "source": {"type": "base64", "media_type": "image/png", "data": "A" * b64_len}, + } + + +def _msg(content) -> dict[str, Any]: + return {"role": "user", "content": content} + + +class TestBaseProviderImageValidation(unittest.TestCase): + def test_oversize_image_raises_imagesizeerror(self) -> None: + provider = _StubProvider(api_key="test") + with self.assertRaises(ImageSizeError): + provider._prepare_messages([_msg([_img_block(API_IMAGE_MAX_BASE64_SIZE + 1)])]) + + def test_image_at_limit_passes(self) -> None: + provider = _StubProvider(api_key="test") + out = provider._prepare_messages([_msg([_img_block(API_IMAGE_MAX_BASE64_SIZE)])]) + self.assertEqual(len(out), 1) + self.assertEqual(out[0]["role"], "user") + + def test_no_images_passes(self) -> None: + provider = _StubProvider(api_key="test") + out = provider._prepare_messages([_msg([{"type": "text", "text": "hi"}])]) + self.assertEqual(out, [_msg([{"type": "text", "text": "hi"}])]) + + def test_text_only_string_content_passes(self) -> None: + """Regression: providers without image support should be unaffected.""" + provider = _StubProvider(api_key="test") + out = provider._prepare_messages([_msg("just a string")]) + self.assertEqual(out, [_msg("just a string")]) + + def test_chat_message_input_converted_and_validated(self) -> None: + """ChatMessage dataclass input still flows through to_dict() and is + validated. Defensive — most callers already pass dicts.""" + provider = _StubProvider(api_key="test") + out = provider._prepare_messages([ChatMessage(role="user", content="hi")]) + self.assertEqual(out, [{"role": "user", "content": "hi"}]) + + def test_oversize_aggregated_with_other_messages(self) -> None: + provider = _StubProvider(api_key="test") + with self.assertRaises(ImageSizeError) as cm: + provider._prepare_messages([ + _msg([{"type": "text", "text": "intro"}]), + _msg([_img_block(API_IMAGE_MAX_BASE64_SIZE + 100)]), + _msg([_img_block(API_IMAGE_MAX_BASE64_SIZE + 200)]), + ]) + self.assertEqual(len(cm.exception.oversized), 2) + + +class TestOpenAICompatPrepareValidatesBeforeTranslation(unittest.TestCase): + """The OpenAI-compatible subclass MUST validate the Anthropic-shape + payload (via ``super()._prepare_messages``) BEFORE translation. After + translation the block carries a ``data:image/png;base64,...`` URL and + the base64 walker can no longer find it.""" + + def test_oversize_image_raises_before_translation(self) -> None: + provider = _StubOpenAICompat(api_key="test") + with self.assertRaises(ImageSizeError): + provider._prepare_messages([_msg([_img_block(API_IMAGE_MAX_BASE64_SIZE + 1)])]) + + def test_in_limit_image_translates_to_image_url(self) -> None: + provider = _StubOpenAICompat(api_key="test") + out = provider._prepare_messages([_msg([_img_block(1000)])]) + # Single user message; image block translated to OpenAI ``image_url`` shape. + self.assertEqual(len(out), 1) + blocks = out[0]["content"] + self.assertEqual(blocks[0]["type"], "image_url") + self.assertTrue(blocks[0]["image_url"]["url"].startswith("data:image/png;base64,")) + + def test_tool_result_with_oversize_image_raises(self) -> None: + """Images embedded inside tool_result content (Read tool image path) + must also be validated before the Anthropic→OpenAI split.""" + provider = _StubOpenAICompat(api_key="test") + tool_result_with_image = { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "tu_1", + "content": [ + {"type": "text", "text": "image content:"}, + _img_block(API_IMAGE_MAX_BASE64_SIZE + 1), + ], + } + ], + } + with self.assertRaises(ImageSizeError): + provider._prepare_messages([tool_result_with_image]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_image_validation.py b/tests/test_image_validation.py index 9d0bc6c..f01ecf8 100644 --- a/tests/test_image_validation.py +++ b/tests/test_image_validation.py @@ -2,6 +2,7 @@ from __future__ import annotations import unittest +from typing import Any from src.utils.image_processor import API_IMAGE_MAX_BASE64_SIZE from src.utils.image_validation import ImageSizeError, validate_images_for_api @@ -62,6 +63,86 @@ def test_handles_nested_tool_result_with_image(self) -> None: with self.assertRaises(ImageSizeError): validate_images_for_api(msgs) + def test_oversize_image_inside_tool_result_content_raises(self) -> None: + """Read tool returns image blocks inside ``tool_result.content`` + (post-#154/#155). Validator must recurse so it sees them, otherwise + an oversized base64 reaches the API and surfaces as an opaque error. + """ + nested = { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "tu_1", + "content": [ + {"type": "text", "text": "image content:"}, + _img_block(API_IMAGE_MAX_BASE64_SIZE + 1), + ], + } + ], + } + with self.assertRaises(ImageSizeError) as cm: + validate_images_for_api([nested]) + self.assertEqual(len(cm.exception.oversized), 1) + + def test_in_limit_image_inside_tool_result_passes(self) -> None: + nested = { + "role": "user", + "content": [ + { + "type": "tool_result", + "tool_use_id": "tu_1", + "content": [_img_block(1024)], + } + ], + } + validate_images_for_api([nested]) # should not raise + + def test_tool_result_with_string_content_does_not_crash(self) -> None: + """Defensive: a tool_result whose ``content`` is a plain string + (no images) must not trip the new recursion.""" + msgs = [{ + "role": "user", + "content": [{"type": "tool_result", "tool_use_id": "x", "content": "ok"}], + }] + validate_images_for_api(msgs) # should not raise + + def test_deeply_nested_tool_results_hit_depth_limit_not_recursion_error(self) -> None: + """Pathological nesting must NOT raise ``RecursionError``. The + walker has a soft depth cap (``_MAX_TOOL_RESULT_DEPTH``) and + stops descending past it. Beyond-the-cap images are skipped + silently (a wider validator could log a warning, but raising + would convert a defensive guard into a crash for any + adversarial input). + """ + # Build a chain 5000-deep — well above Python's default + # recursion limit (~1000), guaranteeing the unbounded form + # would crash. + innermost = _img_block(API_IMAGE_MAX_BASE64_SIZE + 1) + node: Any = [innermost] + for _ in range(5000): + node = [{"type": "tool_result", "tool_use_id": "x", "content": node}] + msgs = [{"role": "user", "content": node}] + # Should NOT raise RecursionError. Deep image is past the + # depth cap so it's silently skipped. Important: this means + # validate_images_for_api returns without finding an oversize + # image — the depth cap is a safety net, not a strict + # invariant. Real-world tool_result nesting is single-digit. + validate_images_for_api(msgs) + + def test_within_depth_limit_image_still_caught(self) -> None: + """Sanity check the depth cap doesn't reject normal-depth tool_results.""" + # Realistic shape: single tool_result containing an image. + msgs = [{ + "role": "user", + "content": [{ + "type": "tool_result", "tool_use_id": "x", + "content": [_img_block(API_IMAGE_MAX_BASE64_SIZE + 1)], + }], + }] + with self.assertRaises(ImageSizeError): + validate_images_for_api(msgs) + def test_skips_image_block_with_no_data(self) -> None: """Defensive: malformed blocks without source.data don't crash.""" bad = {"type": "image", "source": {"type": "url", "url": "https://x"}} diff --git a/tests/test_openai_compat_document_translation.py b/tests/test_openai_compat_document_translation.py new file mode 100644 index 0000000..7cafc22 --- /dev/null +++ b/tests/test_openai_compat_document_translation.py @@ -0,0 +1,211 @@ +"""Tests for Anthropic→OpenAI document-block translation in the +OpenAI-compatible provider converter. + +Anthropic shape: ``{"type": "document", "source": {"type": "base64", +"media_type": "application/pdf", "data": "..."}}``. +OpenAI shape: ``{"type": "file", "file": {"filename": "document.pdf", +"file_data": "data:application/pdf;base64,..."}}``. + +No production path currently produces ``DocumentBlock`` for an +OpenAI-compatible provider — PDFs flow through Read tool's +``_read_map_result_to_api`` as text. This translator is a defensive +addition so that if a future @-mention path or third-party tool +returns a PDF block, it lands in the OpenAI shape instead of silently +passing through as an Anthropic-shape block the provider can't parse. +""" + +from __future__ import annotations + +import unittest + +from src.providers.openai_compatible import ( + _anthropic_document_block_to_openai, + _convert_anthropic_messages_to_openai, +) + + +class TestAnthropicDocumentBlockToOpenAI(unittest.TestCase): + def test_valid_block_translates_to_file(self) -> None: + out = _anthropic_document_block_to_openai({ + "type": "document", + "source": {"type": "base64", "media_type": "application/pdf", "data": "ABCD"}, + }) + self.assertIsNotNone(out) + self.assertEqual(out["type"], "file") + self.assertEqual(out["file"]["filename"], "document.pdf") + self.assertEqual(out["file"]["file_data"], "data:application/pdf;base64,ABCD") + + def test_non_document_block_returns_none(self) -> None: + # An image block must NOT be claimed by the document translator, + # otherwise the multimodal dispatcher would emit the wrong shape. + self.assertIsNone(_anthropic_document_block_to_openai({ + "type": "image", + "source": {"type": "base64", "media_type": "image/png", "data": "XYZ"}, + })) + self.assertIsNone(_anthropic_document_block_to_openai({ + "type": "text", "text": "hi"})) + + def test_missing_source_returns_none(self) -> None: + self.assertIsNone(_anthropic_document_block_to_openai({"type": "document"})) + + def test_url_source_returns_none(self) -> None: + """Anthropic supports url-based document blocks too; we only + translate base64 form here (data URI requires raw bytes).""" + self.assertIsNone(_anthropic_document_block_to_openai({ + "type": "document", + "source": {"type": "url", "url": "https://example.com/doc.pdf"}, + })) + + def test_missing_media_type_defaults_to_pdf(self) -> None: + out = _anthropic_document_block_to_openai({ + "type": "document", + "source": {"type": "base64", "data": "ABCD"}, + }) + self.assertIsNotNone(out) + self.assertEqual(out["file"]["file_data"], "data:application/pdf;base64,ABCD") + + def test_empty_data_returns_none(self) -> None: + """Producer-bug guard mirrors the image translator: empty + ``data`` would produce ``data:application/pdf;base64,`` which + the server rejects with a confusing error. Returning None lets + the upstream serializer surface the malformed shape instead.""" + self.assertIsNone(_anthropic_document_block_to_openai({ + "type": "document", + "source": {"type": "base64", "media_type": "application/pdf", "data": ""}, + })) + self.assertIsNone(_anthropic_document_block_to_openai({ + "type": "document", + "source": {"type": "base64", "media_type": "application/pdf"}, + })) + + def test_non_dict_source_returns_none(self) -> None: + self.assertIsNone(_anthropic_document_block_to_openai({ + "type": "document", + "source": "not-a-dict", + })) + + +class TestConverterDocumentIntegration(unittest.TestCase): + """End-to-end coverage through ``_convert_anthropic_messages_to_openai``.""" + + def test_user_message_with_document_translates(self) -> None: + messages = [ + {"role": "user", "content": [ + {"type": "text", "text": "Summarise this PDF"}, + {"type": "document", + "source": {"type": "base64", "media_type": "application/pdf", "data": "PDFB64"}}, + ]}, + ] + out = _convert_anthropic_messages_to_openai(messages) + self.assertEqual(len(out), 1) + blocks = out[0]["content"] + # Text passes through; document becomes a "file" block. + self.assertEqual(blocks[0]["type"], "text") + self.assertEqual(blocks[1]["type"], "file") + self.assertIn("data:application/pdf;base64,PDFB64", blocks[1]["file"]["file_data"]) + + def test_mixed_image_and_document_translate_to_distinct_shapes(self) -> None: + """Image must become ``image_url``; document must become ``file``. + Pins that the multimodal dispatcher doesn't accidentally route + both through the same translator.""" + messages = [ + {"role": "user", "content": [ + {"type": "image", + "source": {"type": "base64", "media_type": "image/png", "data": "IMG"}}, + {"type": "document", + "source": {"type": "base64", "media_type": "application/pdf", "data": "DOC"}}, + ]}, + ] + out = _convert_anthropic_messages_to_openai(messages) + blocks = out[0]["content"] + self.assertEqual(blocks[0]["type"], "image_url") + self.assertEqual(blocks[1]["type"], "file") + + def test_tool_result_with_document_splits_into_tool_then_user_file(self) -> None: + """The image-tool-result split path must extend to documents: + ``role=tool`` (text body / placeholder) + synthetic ``role=user`` + carrying the ``file`` content block.""" + messages = [ + {"role": "assistant", "content": [ + {"type": "tool_use", "id": "tu_doc", "name": "Read", "input": {}}, + ]}, + {"role": "user", "content": [ + {"type": "tool_result", "tool_use_id": "tu_doc", "content": [ + {"type": "document", + "source": {"type": "base64", "media_type": "application/pdf", "data": "PDF"}}, + ]}, + ]}, + ] + out = _convert_anthropic_messages_to_openai(messages) + self.assertEqual(out[0]["role"], "assistant") + self.assertEqual(out[1]["role"], "tool") + self.assertEqual(out[1]["tool_call_id"], "tu_doc") + # tool message must have non-empty content (OpenAI requires it) + # AND must carry the tool_use_id correlation marker. + self.assertTrue(out[1]["content"]) + self.assertIn("tu_doc", out[1]["content"]) + # Synthetic user message follows: correlation text block first, + # then the translated file block. + self.assertEqual(out[2]["role"], "user") + self.assertEqual(out[2]["content"][0]["type"], "text") + self.assertIn("tu_doc", out[2]["content"][0]["text"]) + self.assertEqual(out[2]["content"][1]["type"], "file") + + def test_tool_result_with_text_and_document_splits(self) -> None: + messages = [ + {"role": "assistant", "content": [ + {"type": "tool_use", "id": "tu_2", "name": "Read", "input": {}}, + ]}, + {"role": "user", "content": [ + {"type": "tool_result", "tool_use_id": "tu_2", "content": [ + {"type": "text", "text": "Here is the PDF"}, + {"type": "document", + "source": {"type": "base64", "media_type": "application/pdf", "data": "PDF2"}}, + ]}, + ]}, + ] + out = _convert_anthropic_messages_to_openai(messages) + tool_msgs = [m for m in out if m.get("role") == "tool"] + self.assertEqual(len(tool_msgs), 1) + self.assertIn("Here is the PDF", tool_msgs[0]["content"]) + # Tool message must ALSO carry the tool_use_id correlation marker + # in the text+document case (symmetry with text+image). + self.assertIn("tu_2", tool_msgs[0]["content"]) + idx = out.index(tool_msgs[0]) + self.assertEqual(out[idx + 1]["role"], "user") + # Correlation text block first, then translated file block. + self.assertEqual(out[idx + 1]["content"][0]["type"], "text") + self.assertIn("tu_2", out[idx + 1]["content"][0]["text"]) + self.assertEqual(out[idx + 1]["content"][1]["type"], "file") + + def test_tool_result_image_plus_document_both_carried_in_user_message(self) -> None: + """A tool_result with BOTH an image and a document should produce + a single synthetic user message containing BOTH translated + blocks, in order.""" + messages = [ + {"role": "assistant", "content": [ + {"type": "tool_use", "id": "tu_mix", "name": "Read", "input": {}}, + ]}, + {"role": "user", "content": [ + {"type": "tool_result", "tool_use_id": "tu_mix", "content": [ + {"type": "image", + "source": {"type": "base64", "media_type": "image/png", "data": "I"}}, + {"type": "document", + "source": {"type": "base64", "media_type": "application/pdf", "data": "D"}}, + ]}, + ]}, + ] + out = _convert_anthropic_messages_to_openai(messages) + user_msgs = [m for m in out if m.get("role") == "user"] + # The original user-with-tool_result is the input; only the + # synthetic user message survives in the output (the original + # was consumed by the tool_result branch). + self.assertEqual(len(user_msgs), 1) + blocks = user_msgs[0]["content"] + types = [b.get("type") for b in blocks] + self.assertIn("image_url", types) + self.assertIn("file", types) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_openai_compat_image_translation.py b/tests/test_openai_compat_image_translation.py index 56b5a50..370e2ac 100644 --- a/tests/test_openai_compat_image_translation.py +++ b/tests/test_openai_compat_image_translation.py @@ -155,10 +155,19 @@ def test_tool_result_with_image_splits_into_tool_then_user_image(self) -> None: self.assertEqual(out[1]["role"], "tool") self.assertEqual(out[1]["tool_call_id"], "tu_1") self.assertTrue(out[1]["content"], "tool message content must be non-empty") - # 3) synthetic user message carrying the image_url + # Placeholder must reference the tool_use_id so the model can + # mentally correlate the synthetic ``role=user`` follow-up message + # back to *this* tool_call (known OpenAI-API limitation: there's + # no wire-level link between tool_call_id and a user message). + self.assertIn("tu_1", out[1]["content"]) + # 3) synthetic user message: leads with a correlation text block + # naming the same tool_use_id (so the link is visible from both + # directions), followed by the image_url block. self.assertEqual(out[2]["role"], "user") - self.assertEqual(out[2]["content"][0]["type"], "image_url") - self.assertIn("png;base64,XYZ", out[2]["content"][0]["image_url"]["url"]) + self.assertEqual(out[2]["content"][0]["type"], "text") + self.assertIn("tu_1", out[2]["content"][0]["text"]) + self.assertEqual(out[2]["content"][1]["type"], "image_url") + self.assertIn("png;base64,XYZ", out[2]["content"][1]["image_url"]["url"]) def test_tool_result_with_text_and_image_splits_text_to_tool_image_to_user(self) -> None: messages = [ @@ -177,10 +186,42 @@ def test_tool_result_with_text_and_image_splits_text_to_tool_image_to_user(self) tool_msgs = [m for m in out if m.get("role") == "tool"] self.assertEqual(len(tool_msgs), 1) self.assertIn("Here is the image", tool_msgs[0]["content"]) - # The image becomes its own following user message. + # Tool message must ALSO carry the tool_use_id correlation marker, + # symmetric with the image-only branch. Without it the text+image + # case would have no correlation hint and the synthetic user + # message could be misread as a new prompt. + self.assertIn("tu_2", tool_msgs[0]["content"]) + # The image becomes its own following user message. Leads with a + # text correlation marker, then the image_url block. idx = out.index(tool_msgs[0]) self.assertEqual(out[idx + 1]["role"], "user") - self.assertEqual(out[idx + 1]["content"][0]["type"], "image_url") + self.assertEqual(out[idx + 1]["content"][0]["type"], "text") + self.assertIn("tu_2", out[idx + 1]["content"][0]["text"]) + self.assertEqual(out[idx + 1]["content"][1]["type"], "image_url") + + def test_tool_result_with_empty_list_content_emits_non_empty_tool_message(self) -> None: + """Defensive sentinel: an empty tool_result content list (or a list + of block types the converter doesn't recognise) must NOT emit a + ``role=tool`` with empty content — OpenAI rejects that. The + converter falls back to a literal ``[empty tool result]`` so the + invariant documented in the split comment is enforced by code, + not just by docstring.""" + messages = [ + {"role": "assistant", "content": [ + {"type": "tool_use", "id": "tu_e", "name": "Bash", "input": {}}, + ]}, + {"role": "user", "content": [ + {"type": "tool_result", "tool_use_id": "tu_e", "content": []}, + ]}, + ] + out = _convert_anthropic_messages_to_openai(messages) + tool_msgs = [m for m in out if m.get("role") == "tool"] + self.assertEqual(len(tool_msgs), 1) + self.assertTrue(tool_msgs[0]["content"]) + # No synthetic user message should follow when there were no + # multimodal blocks (only the empty-content sentinel applies). + idx = out.index(tool_msgs[0]) + self.assertEqual(idx + 1, len(out), "no synthetic user message expected") def test_tool_result_text_only_unchanged_no_extra_user_message(self) -> None: """A text-only tool_result must NOT spawn an extra synthetic user diff --git a/tests/test_query_loop.py b/tests/test_query_loop.py index 3dad14f..2baadeb 100644 --- a/tests/test_query_loop.py +++ b/tests/test_query_loop.py @@ -284,5 +284,66 @@ async def run(): self.assertGreaterEqual(len(interruptions), 1) +class TestQueryLoopImageSizeError(unittest.TestCase): + """ImageSizeError raised by ``BaseProvider._prepare_messages`` must be + translated by ``_call_model_sync`` into a media_size assistant error + rather than crashing the query loop. Pins the contract between the + provider-layer pre-API guard and the higher-level recovery path.""" + + def setUp(self): + self.temp_dir = tempfile.TemporaryDirectory() + self.workspace = Path(self.temp_dir.name) + self.registry = build_default_registry() + self.context = ToolContext(workspace_root=self.workspace) + self.abort = AbortController() + + def tearDown(self): + self.temp_dir.cleanup() + + def test_image_size_error_from_provider_yields_media_size_assistant_message(self): + from src.utils.image_validation import ImageSizeError + + provider = MagicMock() + # Both streaming and non-streaming code paths share _prepare_messages, + # so both surface ImageSizeError. Simulate that here. + oversize_exc = ImageSizeError([(6 * 1024 * 1024, 5 * 1024 * 1024)]) + provider.chat_stream_response.side_effect = oversize_exc + provider.chat.side_effect = oversize_exc + + messages = [UserMessage(content="Describe this oversized image")] + params = QueryParams( + messages=messages, + system_prompt="You are helpful.", + tools=self.registry.list_tools(), + tool_registry=self.registry, + tool_use_context=self.context, + provider=provider, + abort_controller=self.abort, + max_turns=10, + ) + + collected = [] + + async def run(): + async for msg in query(params): + collected.append(msg) + + _run(run()) + + assistants = [m for m in collected if isinstance(m, AssistantMessage)] + self.assertEqual(len(assistants), 1) + err = assistants[0] + # The assistant message must be flagged as an API-error and carry + # the ``media_size`` classification so the reactive-compact recovery + # path can match on it. + self.assertTrue(getattr(err, "isApiErrorMessage", False)) + self.assertEqual(getattr(err, "_api_error", None), "media_size") + content = err.content + text = content if isinstance(content, str) else "".join( + b.text for b in content if isinstance(b, TextBlock) + ) + self.assertIn("Media too large", text) + + if __name__ == "__main__": unittest.main()