diff --git a/src/adcp/decisioning/_permission_denied_budget.py b/src/adcp/decisioning/_permission_denied_budget.py new file mode 100644 index 00000000..05e81413 --- /dev/null +++ b/src/adcp/decisioning/_permission_denied_budget.py @@ -0,0 +1,122 @@ +"""Timing-oracle defense for the two ``PERMISSION_DENIED`` branches of +:func:`adcp.decisioning.handler._resolve_buyer_agent`. + +Background +---------- + +After #748 migrated suspended / blocked agents to dedicated wire codes +(``AGENT_SUSPENDED`` / ``AGENT_BLOCKED``), the only paths that still emit +``PERMISSION_DENIED`` from the commercial-identity gate are: + +* **registry-miss / no-credential / unauthenticated** — ``agent is None``. +* **unknown-status default-reject** — ``agent.status`` is neither + ``active`` nor a recognized rejection status. + +Both must be observably indistinguishable in latency: registry I/O on the +miss branch may be fast (cache hit returning ``None``) while the +default-reject branch always pays the registry read for a real row, which +is a natural timing oracle ("if the response is fast, the agent_url is +not onboarded"). The budget below floors both branches at the same +deadline so the I/O variance is absorbed into the wait. + +Why not on the dedicated-code branches +-------------------------------------- + +``AGENT_SUSPENDED`` / ``AGENT_BLOCKED`` are intentionally distinct wire +codes — the code itself is the discriminator. An attacker who reads the +code already knows the agent is recognized; the latency carries no +additional bit. Per #772 the budget is scoped to ``PERMISSION_DENIED`` +only. + +Failure mode +------------ + +Operators tune the budget via ``ADCP_PERMISSION_DENIED_BUDGET_MS``. Any +non-numeric, non-finite, or non-positive value falls back to the default +and logs at WARNING. The budget is **never silently disabled** — an +operator who fat-fingers the env var must see the deviation in their +logs, because the alternative is a timing-oracle regression they cannot +detect by reading the response. +""" + +from __future__ import annotations + +import asyncio +import logging +import math +import os +import time + +BUDGET_ENV_VAR = "ADCP_PERMISSION_DENIED_BUDGET_MS" +DEFAULT_BUDGET_MS = 50.0 + +_logger = logging.getLogger(__name__) + + +def parse_budget_ms(raw: str | None = None) -> float: + """Resolve the budget in milliseconds. + + ``raw=None`` reads from the environment; an explicit string lets + tests exercise the parser without monkeypatching ``os.environ``. + + Falls back to :data:`DEFAULT_BUDGET_MS` (with a WARNING log) on: + + * unset env var (no warning — this is the expected default path); + * non-numeric (``"abc"``, ``""``); + * non-finite (``"nan"``, ``"inf"``, ``"-inf"``); + * non-positive (``"0"``, ``"-5"``). + """ + if raw is None: + raw = os.environ.get(BUDGET_ENV_VAR) + if raw is None: + return DEFAULT_BUDGET_MS + try: + ms = float(raw) + except (TypeError, ValueError): + _logger.warning( + "%s=%r is not numeric; falling back to default %s ms. The " + "PERMISSION_DENIED timing-oracle defense is still active at " + "the default budget.", + BUDGET_ENV_VAR, + raw, + DEFAULT_BUDGET_MS, + ) + return DEFAULT_BUDGET_MS + if not math.isfinite(ms) or ms <= 0: + _logger.warning( + "%s=%r is not a positive finite number; falling back to " + "default %s ms. The PERMISSION_DENIED timing-oracle defense " + "is never silently disabled — set a positive finite value to " + "override the default.", + BUDGET_ENV_VAR, + raw, + DEFAULT_BUDGET_MS, + ) + return DEFAULT_BUDGET_MS + return ms + + +class PermissionDeniedBudget: + """Per-request budget instance. Construct at function entry, call + :meth:`enforce` immediately before raising ``PERMISSION_DENIED``. + + The deadline is measured from the construction site, not from the + branch site, so I/O latency variance between branches is absorbed + into the budget rather than added on top of it. + """ + + __slots__ = ("_budget_seconds", "_start") + + def __init__(self) -> None: + self._budget_seconds = parse_budget_ms() / 1000.0 + self._start = time.perf_counter() + + @property + def budget_seconds(self) -> float: + return self._budget_seconds + + async def enforce(self) -> None: + elapsed = time.perf_counter() - self._start + remaining = self._budget_seconds - elapsed + if remaining > 0: + await asyncio.sleep(remaining) diff --git a/src/adcp/decisioning/handler.py b/src/adcp/decisioning/handler.py index 9e8c9671..f14f2ece 100644 --- a/src/adcp/decisioning/handler.py +++ b/src/adcp/decisioning/handler.py @@ -481,11 +481,16 @@ async def _resolve_buyer_agent( set on the unestablished-identity path (omit-on-unestablished-identity rule). - Note on parity: the *latency / headers / side-effects* parity - contract between the recognized and unrecognized paths is tracked - as a follow-up — the eager-raise pattern below still completes the - unrecognized path on a different code path than the recognized - ones. + Timing-oracle defense on ``PERMISSION_DENIED``: both branches that + surface ``PERMISSION_DENIED`` (registry-miss and unknown-status + default-reject) flow through + :class:`adcp.decisioning._permission_denied_budget.PermissionDeniedBudget` + so registry I/O variance between the cache-hit-returning-None and + cache-hit-returning-real-row paths is absorbed into a fixed budget + (default 50 ms, tunable via ``ADCP_PERMISSION_DENIED_BUDGET_MS``). + The dedicated-code branches (``AGENT_SUSPENDED`` / ``AGENT_BLOCKED``) + intentionally skip the budget — the code itself is the discriminator, + so latency parity carries no additional bit. :raises AdcpError: ``AGENT_SUSPENDED`` / ``AGENT_BLOCKED`` / ``PERMISSION_DENIED`` depending on path (see above). The @@ -496,6 +501,7 @@ async def _resolve_buyer_agent( carries ``recovery="correctable"`` per the spec's ``enumMetadata``. """ + from adcp.decisioning._permission_denied_budget import PermissionDeniedBudget from adcp.decisioning.registry import ( ApiKeyCredential, HttpSigCredential, @@ -503,6 +509,7 @@ async def _resolve_buyer_agent( ) from adcp.decisioning.types import AdcpError + budget = PermissionDeniedBudget() credential = auth_info.credential if auth_info is not None else None agent: BuyerAgent | None = None if credential is not None: @@ -547,6 +554,7 @@ async def _resolve_buyer_agent( # unrecognized-agent path MUST be indistinguishable on the # wire from the recognized-but-denied path, and ``scope`` # would itself be the discriminator. + await budget.enforce() raise AdcpError( "PERMISSION_DENIED", message=_denied_message, @@ -581,6 +589,7 @@ async def _resolve_buyer_agent( # but the framework cannot interpret it, which is operationally # equivalent to "not authorized" without a defensible status # claim to project on the wire). + await budget.enforce() raise AdcpError( "PERMISSION_DENIED", message=_denied_message, diff --git a/tests/test_permission_denied_budget.py b/tests/test_permission_denied_budget.py new file mode 100644 index 00000000..0da826e8 --- /dev/null +++ b/tests/test_permission_denied_budget.py @@ -0,0 +1,286 @@ +"""Unit + integration tests for the PERMISSION_DENIED timing budget. + +Covers issue #772 part 1 (the narrow follow-up extracted from #392/#735): + +* Env-var hardening: every invalid value falls back to the default, + logs WARNING, and never silently disables the timing-oracle defense. +* Deadline-relative sleep: branches that complete before the budget + expires get padded to the budget; branches that overrun pass through + immediately. +* Branch parity through ``_resolve_buyer_agent``: the two + ``PERMISSION_DENIED`` branches (registry-miss and unknown-status + default-reject) both wait for the budget, so a synthetic slow + registry on one branch and a fast registry on the other no longer + produce a timing oracle. +""" + +from __future__ import annotations + +import asyncio +import logging +import time + +import pytest + +from adcp.decisioning import ( + AuthInfo, + BuyerAgent, + BuyerAgentRegistry, + HttpSigCredential, +) +from adcp.decisioning._permission_denied_budget import ( + BUDGET_ENV_VAR, + DEFAULT_BUDGET_MS, + PermissionDeniedBudget, + parse_budget_ms, +) +from adcp.decisioning.handler import _resolve_buyer_agent +from adcp.decisioning.types import AdcpError + +# --------------------------------------------------------------------------- +# parse_budget_ms — env-var hardening +# --------------------------------------------------------------------------- + + +def test_parse_unset_returns_default(): + assert parse_budget_ms(None) == DEFAULT_BUDGET_MS + + +@pytest.mark.parametrize( + "raw", + ["100", "100.0", "1.5", "0.1", "10000"], +) +def test_parse_valid_positive_finite(raw: str): + assert parse_budget_ms(raw) == float(raw) + + +@pytest.mark.parametrize( + "raw", + ["abc", "", " ", "100ms", "fifty"], +) +def test_parse_non_numeric_falls_back_with_warning(raw: str, caplog): + with caplog.at_level(logging.WARNING, logger="adcp.decisioning._permission_denied_budget"): + assert parse_budget_ms(raw) == DEFAULT_BUDGET_MS + assert any("not numeric" in r.message for r in caplog.records) + + +@pytest.mark.parametrize( + "raw", + ["0", "-1", "-100.5", "nan", "NaN", "inf", "-inf", "Infinity"], +) +def test_parse_non_positive_or_non_finite_falls_back_with_warning(raw: str, caplog): + with caplog.at_level(logging.WARNING, logger="adcp.decisioning._permission_denied_budget"): + assert parse_budget_ms(raw) == DEFAULT_BUDGET_MS + assert any("positive finite" in r.message for r in caplog.records) + + +def test_env_var_read_when_no_explicit_arg(monkeypatch): + monkeypatch.setenv(BUDGET_ENV_VAR, "123.5") + assert parse_budget_ms() == 123.5 + + +def test_env_var_invalid_still_falls_back(monkeypatch, caplog): + monkeypatch.setenv(BUDGET_ENV_VAR, "nan") + with caplog.at_level(logging.WARNING, logger="adcp.decisioning._permission_denied_budget"): + assert parse_budget_ms() == DEFAULT_BUDGET_MS + assert any("positive finite" in r.message for r in caplog.records) + + +# --------------------------------------------------------------------------- +# PermissionDeniedBudget — deadline-relative sleep +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_enforce_sleeps_to_budget_when_branch_is_fast(monkeypatch): + """A branch that does no work waits the full budget.""" + monkeypatch.setenv(BUDGET_ENV_VAR, "20") + budget = PermissionDeniedBudget() + start = time.perf_counter() + await budget.enforce() + elapsed_ms = (time.perf_counter() - start) * 1000.0 + # Allow a little slack for asyncio.sleep overhead but require the + # bulk of the budget to have been honored. + assert elapsed_ms >= 18.0, f"budget not honored: only {elapsed_ms:.1f} ms elapsed" + + +@pytest.mark.asyncio +async def test_enforce_does_not_sleep_when_already_overrun(monkeypatch): + """A branch that already exceeded the budget passes through immediately.""" + monkeypatch.setenv(BUDGET_ENV_VAR, "5") + budget = PermissionDeniedBudget() + # Burn more than the budget before enforce(). + await asyncio.sleep(0.020) + start = time.perf_counter() + await budget.enforce() + elapsed_ms = (time.perf_counter() - start) * 1000.0 + assert elapsed_ms < 5.0, f"enforce() should be a no-op once overrun, got {elapsed_ms:.1f} ms" + + +@pytest.mark.asyncio +async def test_invalid_env_var_still_enforces_default_budget(monkeypatch, caplog): + """The defense must not be silently disabled by a malformed env var.""" + monkeypatch.setenv(BUDGET_ENV_VAR, "0") + with caplog.at_level(logging.WARNING, logger="adcp.decisioning._permission_denied_budget"): + budget = PermissionDeniedBudget() + assert budget.budget_seconds == DEFAULT_BUDGET_MS / 1000.0 + start = time.perf_counter() + await budget.enforce() + elapsed_ms = (time.perf_counter() - start) * 1000.0 + assert elapsed_ms >= DEFAULT_BUDGET_MS - 5.0 + + +@pytest.mark.asyncio +async def test_inf_does_not_hang(monkeypatch): + """Regression: `inf` previously could have produced asyncio.sleep(inf).""" + monkeypatch.setenv(BUDGET_ENV_VAR, "inf") + budget = PermissionDeniedBudget() + # The constructor falls back to the default; the budget is now + # ~50 ms, not infinity. Use a wall-clock timeout to catch a hang. + await asyncio.wait_for(budget.enforce(), timeout=0.5) + + +# --------------------------------------------------------------------------- +# Integration through _resolve_buyer_agent: branch parity +# --------------------------------------------------------------------------- + + +class _SlowRegistry(BuyerAgentRegistry): + """A registry whose resolve methods take a configurable amount of time.""" + + def __init__(self, delay_ms: float, agent: BuyerAgent | None) -> None: + self._delay_seconds = delay_ms / 1000.0 + self._agent = agent + + async def resolve_by_agent_url(self, agent_url: str) -> BuyerAgent | None: + await asyncio.sleep(self._delay_seconds) + return self._agent + + async def resolve_by_credential(self, credential): # pragma: no cover - not used here + await asyncio.sleep(self._delay_seconds) + return self._agent + + +def _http_sig_auth(agent_url: str) -> AuthInfo: + return AuthInfo( + kind="http_sig", + credential=HttpSigCredential( + kind="http_sig", + keyid="kid-1", + agent_url=agent_url, + verified_at=1700000000.0, + ), + ) + + +@pytest.mark.asyncio +async def test_unrecognized_branch_honors_budget(monkeypatch): + """A fast registry-miss must still wait for the budget before raising.""" + monkeypatch.setenv(BUDGET_ENV_VAR, "30") + registry = _SlowRegistry(delay_ms=0.0, agent=None) + start = time.perf_counter() + with pytest.raises(AdcpError) as exc: + await _resolve_buyer_agent(registry, _http_sig_auth("https://buyer.example")) + elapsed_ms = (time.perf_counter() - start) * 1000.0 + assert exc.value.code == "PERMISSION_DENIED" + assert elapsed_ms >= 28.0, f"unrecognized branch returned in {elapsed_ms:.1f} ms" + + +@pytest.mark.asyncio +async def test_unknown_status_branch_honors_budget(monkeypatch): + """A fast registry hit with an unknown status must also wait.""" + monkeypatch.setenv(BUDGET_ENV_VAR, "30") + # Construct a BuyerAgent with a status the framework doesn't recognize. + # The default-reject branch projects PERMISSION_DENIED with no details. + agent = BuyerAgent( + agent_url="https://buyer.example", + display_name="Test Buyer", + status="pending_review", # not active/suspended/blocked + ) + registry = _SlowRegistry(delay_ms=0.0, agent=agent) + start = time.perf_counter() + with pytest.raises(AdcpError) as exc: + await _resolve_buyer_agent(registry, _http_sig_auth("https://buyer.example")) + elapsed_ms = (time.perf_counter() - start) * 1000.0 + assert exc.value.code == "PERMISSION_DENIED" + assert elapsed_ms >= 28.0, f"unknown-status branch returned in {elapsed_ms:.1f} ms" + + +@pytest.mark.asyncio +async def test_dedicated_code_branches_skip_budget(monkeypatch): + """AGENT_SUSPENDED / AGENT_BLOCKED don't pay the budget — the code IS the discriminator.""" + monkeypatch.setenv(BUDGET_ENV_VAR, "500") # big budget so we'd notice + for status, expected_code in [("suspended", "AGENT_SUSPENDED"), ("blocked", "AGENT_BLOCKED")]: + agent = BuyerAgent( + agent_url="https://buyer.example", + display_name="Test Buyer", + status=status, + ) + registry = _SlowRegistry(delay_ms=0.0, agent=agent) + start = time.perf_counter() + with pytest.raises(AdcpError) as exc: + await _resolve_buyer_agent(registry, _http_sig_auth("https://buyer.example")) + elapsed_ms = (time.perf_counter() - start) * 1000.0 + assert exc.value.code == expected_code + assert ( + elapsed_ms < 200.0 + ), f"{status} branch waited {elapsed_ms:.1f} ms — should skip the budget" + + +@pytest.mark.asyncio +async def test_active_branch_skips_budget(monkeypatch): + """The happy path obviously doesn't pay the budget.""" + monkeypatch.setenv(BUDGET_ENV_VAR, "500") + agent = BuyerAgent( + agent_url="https://buyer.example", + display_name="Test Buyer", + status="active", + ) + registry = _SlowRegistry(delay_ms=0.0, agent=agent) + start = time.perf_counter() + resolved = await _resolve_buyer_agent(registry, _http_sig_auth("https://buyer.example")) + elapsed_ms = (time.perf_counter() - start) * 1000.0 + assert resolved is agent + assert elapsed_ms < 200.0 + + +@pytest.mark.asyncio +async def test_branch_parity_absorbs_registry_variance(monkeypatch): + """The whole point: fast registry-miss + slow real-row read should + both come out at ≈ budget, not (0 ms, registry_delay).""" + monkeypatch.setenv(BUDGET_ENV_VAR, "50") + + # Branch 1: registry-miss path with a fast cache-miss-returning-None. + fast_miss = _SlowRegistry(delay_ms=1.0, agent=None) + miss_samples: list[float] = [] + for _ in range(5): + start = time.perf_counter() + with pytest.raises(AdcpError): + await _resolve_buyer_agent(fast_miss, _http_sig_auth("https://buyer.example")) + miss_samples.append((time.perf_counter() - start) * 1000.0) + + # Branch 2: unknown-status path with a slower registry read for a real row. + unknown_agent = BuyerAgent( + agent_url="https://buyer.example", + display_name="Test Buyer", + status="pending_review", + ) + slow_hit = _SlowRegistry(delay_ms=10.0, agent=unknown_agent) + hit_samples: list[float] = [] + for _ in range(5): + start = time.perf_counter() + with pytest.raises(AdcpError): + await _resolve_buyer_agent(slow_hit, _http_sig_auth("https://buyer.example")) + hit_samples.append((time.perf_counter() - start) * 1000.0) + + miss_max = max(miss_samples) + hit_max = max(hit_samples) + # Both should be dominated by the 50 ms budget. The branch difference + # must be much smaller than the registry-delay difference (9 ms) that + # would leak without the budget. + branch_delta = abs(hit_max - miss_max) + assert ( + branch_delta < 15.0 + ), f"branch parity broken: hit={hit_max:.1f} ms vs miss={miss_max:.1f} ms" + # Both should be at or above the budget. + assert min(miss_samples + hit_samples) >= 45.0 diff --git a/tests/test_tier2_spec_conformance.py b/tests/test_tier2_spec_conformance.py index 3c5aad33..5f72f2c1 100644 --- a/tests/test_tier2_spec_conformance.py +++ b/tests/test_tier2_spec_conformance.py @@ -28,13 +28,15 @@ path. A regression test here is the load-bearing CI signal that an adopter doesn't accidentally re-introduce them via copy-paste. -The latency / headers / side-effects parity contract between the -unrecognized-agent path and the recognized-but-denied path is tracked as -a separate follow-up (see issue #375 and the parity-contract follow-up -referenced in the PR body). This file pins the wire-shape conformance -only — the parity refactor needs a single emit point with deliberate -latency padding and identical audit/metric side-effects, which is a -larger dispatch-path refactor than fits in the rename PR. +After #748 (dedicated ``AGENT_SUSPENDED`` / ``AGENT_BLOCKED`` codes with +``recovery="terminal"``) the wire-level oracle between recognized-but-denied +and unrecognized paths is closed by the code split: the dedicated codes +are intentionally distinct, and the unrecognized path's +``PERMISSION_DENIED`` carries no ``details`` discriminator. The remaining +timing-oracle defense (between the two ``PERMISSION_DENIED`` branches +themselves — registry-miss vs. unknown-status default-reject) is enforced +by :class:`adcp.decisioning._permission_denied_budget.PermissionDeniedBudget`; +the parity behavior is pinned in ``tests/test_permission_denied_budget.py``. """ from __future__ import annotations