Skip to content

Commit 4c09fff

Browse files
committed
[#34] add negative tests for MediaPhotoDescribed invariants + media_type warning
Closes 3 flags from the verify round of the Phase B review pipeline: - test_models.py: 3 negative tests pin the model-level invariants on MediaPhotoDescribed (UTC-aware described_at, SUPPORTED_LANGUAGES membership for description_lang, decorative => empty description). - test_describe.py: 1 unit test pins _media_type's unknown-extension fallback to image/jpeg AND the operator-visible logger.warning, so a silent regression on either side is caught. Test count: 502 -> 506. Quality gate green.
1 parent a373eff commit 4c09fff

2 files changed

Lines changed: 106 additions & 0 deletions

File tree

tests/test_describe.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,25 @@ def test_describe_all_sends_payload_with_expected_kwarg_shape(tmp_path: Path):
10261026
assert "Describe images 0 through 1" in text_block["text"]
10271027

10281028

1029+
def test_media_type_warns_on_unknown_extension(caplog):
1030+
"""Unknown extensions fall back to image/jpeg AND emit a `logger.warning`.
1031+
1032+
The downloader only writes `.jpg`/`.jpeg`/`.png`/`.webp`, so an unknown
1033+
suffix means either a hand-edited `items.json` or a future format the
1034+
code does not register. The warning is the operator's only signal that
1035+
the wrong MIME type was sent to Anthropic — pin it so a silent fallback
1036+
cannot regress.
1037+
"""
1038+
import logging
1039+
1040+
from xbrain.describe import _media_type
1041+
1042+
with caplog.at_level(logging.WARNING, logger="xbrain.describe"):
1043+
result = _media_type("123/0.gif")
1044+
assert result == "image/jpeg"
1045+
assert any("unknown extension" in rec.message.lower() for rec in caplog.records)
1046+
1047+
10291048
# --------------------------------------------------------------------- refusal
10301049

10311050

tests/test_models.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,93 @@ def test_media_photo_described_rejects_parent_traversal_local_path():
408408
)
409409

410410

411+
def test_media_photo_described_rejects_naive_described_at():
412+
"""`described_at` must be timezone-aware (UTC); a naive datetime is rejected.
413+
414+
The UTC-aware invariant is enforced by a field validator so a
415+
hand-edited `items.json` entry cannot smuggle a local-time datetime
416+
past the type boundary. Naive timestamps cause downstream UTC math
417+
(eligibility checks, sort orders) to drift silently.
418+
"""
419+
import pytest
420+
from pydantic import ValidationError
421+
422+
from xbrain.models import MediaPhotoDescribed
423+
424+
with pytest.raises(ValidationError):
425+
MediaPhotoDescribed(
426+
url="https://pbs.twimg.com/media/X.jpg",
427+
local_path="123/0.jpg",
428+
width=10,
429+
height=10,
430+
bytes_size=100,
431+
downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
432+
is_decorative=False,
433+
description="hello",
434+
description_lang="English",
435+
description_version="v1",
436+
described_at=datetime(2026, 5, 24), # naive — must fail
437+
)
438+
439+
440+
def test_media_photo_described_rejects_unsupported_description_lang():
441+
"""`description_lang` must be in `SUPPORTED_LANGUAGES`; others are rejected.
442+
443+
The type alias is derived from `i18n.SUPPORTED_LANGUAGES` so the
444+
`Literal[...]` validator rejects unknown languages at construction.
445+
Prevents an out-of-band language tag from polluting the vault.
446+
"""
447+
import pytest
448+
from pydantic import ValidationError
449+
450+
from xbrain.models import MediaPhotoDescribed
451+
452+
with pytest.raises(ValidationError):
453+
MediaPhotoDescribed(
454+
url="https://pbs.twimg.com/media/X.jpg",
455+
local_path="123/0.jpg",
456+
width=10,
457+
height=10,
458+
bytes_size=100,
459+
downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
460+
is_decorative=False,
461+
description="hello",
462+
description_lang="Klingon", # not in SUPPORTED_LANGUAGES
463+
description_version="v1",
464+
described_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
465+
)
466+
467+
468+
def test_media_photo_described_rejects_decorative_with_nonempty_description():
469+
"""`is_decorative=True` implies `description == ""` — model-validator enforces.
470+
471+
Defence-in-depth for hand-edited records: the producer
472+
(`describe._apply_judgment`) already blanks the description on
473+
decorative judgments, but a hand-written entry that violates the
474+
invariant must still be rejected at the type boundary so downstream
475+
callers can rely on `is_decorative => not description` unconditionally.
476+
"""
477+
import pytest
478+
from pydantic import ValidationError
479+
480+
from xbrain.models import MediaPhotoDescribed
481+
482+
with pytest.raises(ValidationError):
483+
MediaPhotoDescribed(
484+
url="https://pbs.twimg.com/media/X.jpg",
485+
local_path="123/0.jpg",
486+
width=10,
487+
height=10,
488+
bytes_size=100,
489+
downloaded_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
490+
is_decorative=True,
491+
description="should be empty when decorative", # violates invariant
492+
description_lang="English",
493+
description_version="v1",
494+
described_at=datetime(2026, 5, 24, tzinfo=timezone.utc),
495+
)
496+
497+
411498
def test_media_discriminator_rejects_unknown_kind():
412499
"""Silently inventing a variant would mask data corruption — reject loudly."""
413500
import pytest

0 commit comments

Comments
 (0)