From 156d6aae773b1a34a72a3c015416ccb70f541658 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 20 May 2026 12:50:28 +0000 Subject: [PATCH 1/4] fix(adagents): inline-resolution path for publisher_properties selectors + publisher_domains[] fan-out Fixes the bug where _resolve_agent_properties with authorization_type "publisher_properties" returned raw selector dicts instead of resolved property objects. Implements the inline-resolution path from adcp#4827: for each selector, fan out over publisher_domain / publisher_domains[], match against the parent file's top-level properties[] by publisher_domain, apply selection_type filter (all / by_tag / by_id), and extend the result. Also adds _selector_domains() and _resolve_inline() as internal helpers with the None-vs-[] sentinel contract documented in their docstrings. Federated fetch (HTTP per-domain fallback) is deferred per #749. Refs #749 --- src/adcp/adagents.py | 84 +++++++++++++++++++++++++++++++++--- tests/test_adagents.py | 97 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 169 insertions(+), 12 deletions(-) diff --git a/src/adcp/adagents.py b/src/adcp/adagents.py index f97ac5e0..e4ed5131 100644 --- a/src/adcp/adagents.py +++ b/src/adcp/adagents.py @@ -949,16 +949,88 @@ def _resolve_agent_properties( and {t for t in p.get("tags", []) if isinstance(t, str)} & authorized_tags ] - # Handle publisher_properties (cross-domain references) + # Handle publisher_properties: inline-resolution path per adcp#4827. + # For each selector, fan out over its domain(s), then try to satisfy from + # the parent file's top-level properties[] before considering a federated + # fetch. Federated fetch (per-domain HTTP) is a follow-up; this change + # fixes the primary bug of returning raw selector dicts instead of resolved + # property objects. if authorization_type == "publisher_properties": - publisher_props = agent.get("publisher_properties", []) - if not isinstance(publisher_props, list): + selectors = agent.get("publisher_properties", []) + if not isinstance(selectors, list): return [] - return [p for p in publisher_props if isinstance(p, dict)] + resolved: list[dict[str, Any]] = [] + for selector in selectors: + if not isinstance(selector, dict): + continue + for domain in _selector_domains(selector): + inline = _resolve_inline(selector, top_level_properties, domain) + if inline is not None: + resolved.extend(inline) + # inline succeeded; skip federated fetch for this domain + # inline is None → no parent-file data for domain; federated + # fetch would go here (not yet implemented). + return resolved return [] +def _selector_domains(selector: dict[str, Any]) -> list[str]: + """Extract publisher domain(s) from a publisher_properties selector. + + Handles both the scalar ``publisher_domain`` form and the compact + ``publisher_domains[]`` array form from adcp#4827. + """ + domains = selector.get("publisher_domains") + if isinstance(domains, list): + return [d for d in domains if isinstance(d, str) and d] + domain = selector.get("publisher_domain") + if isinstance(domain, str) and domain: + return [domain] + return [] + + +def _resolve_inline( + selector: dict[str, Any], + parent_properties: list[dict[str, Any]], + domain: str, +) -> list[dict[str, Any]] | None: + """Attempt to satisfy a selector from the parent file's inline properties. + + Returns ``None`` when no property in ``parent_properties`` carries + ``publisher_domain == domain`` — the caller MUST try a federated fetch. + Returns ``[]`` when inline candidates exist for the domain but none pass + the selector filter — this is a real empty set; the caller MUST NOT fall + back to federated. + + Handles ``selection_type`` values: ``"all"``, ``"by_tag"``, ``"by_id"``. + Unknown types are treated permissively (return all domain candidates). + """ + candidates = [ + p for p in parent_properties + if isinstance(p, dict) and p.get("publisher_domain") == domain + ] + if not candidates: + return None # no inline data for this domain + + selection_type = selector.get("selection_type", "all") + if selection_type == "all": + return list(candidates) + if selection_type == "by_tag": + required_tags = {t for t in selector.get("property_tags", []) if isinstance(t, str)} + if not required_tags: + return list(candidates) + return [ + p for p in candidates + if required_tags & {t for t in p.get("tags", []) if isinstance(t, str)} + ] + if selection_type == "by_id": + required_ids = set(selector.get("property_ids", [])) + return [p for p in candidates if p.get("property_id") in required_ids] + # Unknown selection_type — permissive fallback + return list(candidates) + + def get_all_properties(adagents_data: dict[str, Any]) -> list[dict[str, Any]]: """Extract all properties from adagents.json data. @@ -1035,8 +1107,8 @@ def get_properties_by_agent(adagents_data: dict[str, Any], agent_url: str) -> li - inline_properties: Properties defined directly in the agent's properties array - property_ids: Filter top-level properties by property_id - property_tags: Filter top-level properties by tags - - publisher_properties: References properties from other publisher domains - (returns the selector objects, not resolved properties) + - publisher_properties: Inline-resolved properties from other publisher + domains (resolved from the parent file's top-level properties[] array) Args: adagents_data: Parsed adagents.json data diff --git a/tests/test_adagents.py b/tests/test_adagents.py index 44195fd1..d3a89332 100644 --- a/tests/test_adagents.py +++ b/tests/test_adagents.py @@ -1338,8 +1338,34 @@ def test_get_properties_by_agent_property_tags_multiple(self): assert properties[1]["name"] == "Site 2" def test_get_properties_by_agent_publisher_properties(self): - """Should return publisher_properties selectors for publisher_properties type.""" + """publisher_properties resolves inline properties, not raw selector dicts.""" adagents_data = { + "properties": [ + { + "property_id": "ctv-001", + "publisher_domain": "cnn.com", + "name": "CNN CTV", + "tags": ["ctv"], + }, + { + "property_id": "ctv-002", + "publisher_domain": "cnn.com", + "name": "CNN Sports CTV", + "tags": ["ctv", "sports"], + }, + { + "property_id": "web-001", + "publisher_domain": "cnn.com", + "name": "CNN Web", + "tags": ["web"], + }, + { + "property_id": "espn-001", + "publisher_domain": "espn.com", + "name": "ESPN Home", + "tags": ["sports"], + }, + ], "authorized_agents": [ { "url": "https://agent1.example.com", @@ -1361,11 +1387,70 @@ def test_get_properties_by_agent_publisher_properties(self): } properties = get_properties_by_agent(adagents_data, "https://agent1.example.com") - assert len(properties) == 2 - assert properties[0]["publisher_domain"] == "cnn.com" - assert properties[0]["selection_type"] == "by_tag" - assert properties[1]["publisher_domain"] == "espn.com" - assert properties[1]["selection_type"] == "all" + property_ids = {p["property_id"] for p in properties} + # by_tag "ctv": ctv-001 and ctv-002 match; web-001 does not + # all: espn-001 matches + assert property_ids == {"ctv-001", "ctv-002", "espn-001"} + # Must return resolved property dicts, not selector dicts + assert all("property_id" in p for p in properties) + assert not any("selection_type" in p for p in properties) + + def test_get_properties_by_agent_publisher_domains_fanout(self): + """publisher_domains[] compact form fans out to per-domain inline resolution.""" + adagents_data = { + "properties": [ + { + "property_id": "a-001", + "publisher_domain": "site-a.com", + "name": "Site A", + "tags": ["news"], + }, + { + "property_id": "b-001", + "publisher_domain": "site-b.com", + "name": "Site B", + "tags": ["news"], + }, + ], + "authorized_agents": [ + { + "url": "https://agent1.example.com", + "authorization_type": "publisher_properties", + "authorized_for": "Multi-domain", + "publisher_properties": [ + { + "publisher_domains": ["site-a.com", "site-b.com"], + "selection_type": "all", + }, + ], + }, + ], + } + + properties = get_properties_by_agent(adagents_data, "https://agent1.example.com") + assert {p["property_id"] for p in properties} == {"a-001", "b-001"} + + def test_get_properties_by_agent_publisher_properties_no_inline(self): + """When no parent-file properties exist for a domain, returns empty (no federated).""" + adagents_data = { + "properties": [], + "authorized_agents": [ + { + "url": "https://agent1.example.com", + "authorization_type": "publisher_properties", + "authorized_for": "Cross-domain", + "publisher_properties": [ + { + "publisher_domain": "external.com", + "selection_type": "all", + }, + ], + }, + ], + } + + properties = get_properties_by_agent(adagents_data, "https://agent1.example.com") + assert properties == [] def test_get_properties_by_agent_protocol_agnostic(self): """Should match agent URL regardless of protocol.""" From ee665bedd62beb036242cd434b214c02883d9f97 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 20 May 2026 12:53:35 +0000 Subject: [PATCH 2/4] fix(adagents): dedup resolved properties, fix by_id str filter, update docstring - Deduplicate by property_id across multi-selector/multi-domain fan-out so a property appearing in two selectors isn't returned twice - Filter by_id property_ids to str members (matches the by_tag pattern) - Reword _resolve_inline docstring: None means no inline data for the domain (federated not yet implemented), not a MUST-federate contract - Add by_id selection_type unit test Refs #749 --- src/adcp/adagents.py | 17 +++++++++++------ tests/test_adagents.py | 26 ++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/src/adcp/adagents.py b/src/adcp/adagents.py index e4ed5131..2a7048fe 100644 --- a/src/adcp/adagents.py +++ b/src/adcp/adagents.py @@ -960,16 +960,21 @@ def _resolve_agent_properties( if not isinstance(selectors, list): return [] resolved: list[dict[str, Any]] = [] + seen_ids: set[str | None] = set() for selector in selectors: if not isinstance(selector, dict): continue for domain in _selector_domains(selector): inline = _resolve_inline(selector, top_level_properties, domain) if inline is not None: - resolved.extend(inline) + for prop in inline: + pid = prop.get("property_id") + if pid not in seen_ids: + seen_ids.add(pid) + resolved.append(prop) # inline succeeded; skip federated fetch for this domain # inline is None → no parent-file data for domain; federated - # fetch would go here (not yet implemented). + # fetch would go here (not yet implemented; see #749 Part 2). return resolved return [] @@ -998,10 +1003,10 @@ def _resolve_inline( """Attempt to satisfy a selector from the parent file's inline properties. Returns ``None`` when no property in ``parent_properties`` carries - ``publisher_domain == domain`` — the caller MUST try a federated fetch. + ``publisher_domain == domain`` — the inline path has no data for this + domain; a federated fetch (not yet implemented) would be the next step. Returns ``[]`` when inline candidates exist for the domain but none pass - the selector filter — this is a real empty set; the caller MUST NOT fall - back to federated. + the selector filter — this is a real empty set; do NOT fall back. Handles ``selection_type`` values: ``"all"``, ``"by_tag"``, ``"by_id"``. Unknown types are treated permissively (return all domain candidates). @@ -1025,7 +1030,7 @@ def _resolve_inline( if required_tags & {t for t in p.get("tags", []) if isinstance(t, str)} ] if selection_type == "by_id": - required_ids = set(selector.get("property_ids", [])) + required_ids = {i for i in selector.get("property_ids", []) if isinstance(i, str)} return [p for p in candidates if p.get("property_id") in required_ids] # Unknown selection_type — permissive fallback return list(candidates) diff --git a/tests/test_adagents.py b/tests/test_adagents.py index d3a89332..c93d20f5 100644 --- a/tests/test_adagents.py +++ b/tests/test_adagents.py @@ -1452,6 +1452,32 @@ def test_get_properties_by_agent_publisher_properties_no_inline(self): properties = get_properties_by_agent(adagents_data, "https://agent1.example.com") assert properties == [] + def test_get_properties_by_agent_publisher_properties_by_id(self): + """publisher_properties with selection_type by_id filters by property_id.""" + adagents_data = { + "properties": [ + {"property_id": "ctv-001", "publisher_domain": "cnn.com", "name": "CNN CTV"}, + {"property_id": "ctv-002", "publisher_domain": "cnn.com", "name": "CNN Web"}, + ], + "authorized_agents": [ + { + "url": "https://agent1.example.com", + "authorization_type": "publisher_properties", + "authorized_for": "Specific properties", + "publisher_properties": [ + { + "publisher_domain": "cnn.com", + "selection_type": "by_id", + "property_ids": ["ctv-001"], + }, + ], + }, + ], + } + + properties = get_properties_by_agent(adagents_data, "https://agent1.example.com") + assert {p["property_id"] for p in properties} == {"ctv-001"} + def test_get_properties_by_agent_protocol_agnostic(self): """Should match agent URL regardless of protocol.""" adagents_data = { From cd530bf02b0757c7aa186b7f130ffe9bb6dc2172 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 20 May 2026 14:30:04 +0000 Subject: [PATCH 3/4] =?UTF-8?q?perf(adagents):=20index=20parent=20properti?= =?UTF-8?q?es=20by=20domain=20to=20fix=20O(N=C3=97M)=20at=20cafemedia=20sc?= =?UTF-8?q?ale?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-builds a domain→properties index in _resolve_agent_properties before the selector loop so each per-domain inline lookup is O(1) rather than O(N). At cafemedia scale (6,843 properties × 6,800 domains) the unindexed path was ~46 M comparisons; with the index it is O(N+M). Also adds the cafemedia/interchange.io canonical fixture (6,843 properties, publisher_domains[] compact form) to the test suite — sized to expose O(N×M) regressions and validate the raptive_managed tag filter against the production managed-network shape. Part of #749. https://claude.ai/code/session_01RDYrywLhVbd4crrAHkTnsH --- src/adcp/adagents.py | 31 +++++++++++++++--------- tests/test_adagents.py | 54 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 11 deletions(-) diff --git a/src/adcp/adagents.py b/src/adcp/adagents.py index 2a7048fe..b182f3c2 100644 --- a/src/adcp/adagents.py +++ b/src/adcp/adagents.py @@ -959,13 +959,22 @@ def _resolve_agent_properties( selectors = agent.get("publisher_properties", []) if not isinstance(selectors, list): return [] + # Pre-index parent properties by domain once — O(N) — so per-domain + # lookups are O(1) instead of O(N), avoiding O(N×M) at cafemedia scale + # (6,843 properties × 6,800 domains = 46 M ops without this index). + domain_index: dict[str, list[dict[str, Any]]] = {} + for p in top_level_properties: + if isinstance(p, dict): + d = p.get("publisher_domain") + if isinstance(d, str) and d: + domain_index.setdefault(d, []).append(p) resolved: list[dict[str, Any]] = [] seen_ids: set[str | None] = set() for selector in selectors: if not isinstance(selector, dict): continue for domain in _selector_domains(selector): - inline = _resolve_inline(selector, top_level_properties, domain) + inline = _resolve_inline(selector, domain_index, domain) if inline is not None: for prop in inline: pid = prop.get("property_id") @@ -997,24 +1006,24 @@ def _selector_domains(selector: dict[str, Any]) -> list[str]: def _resolve_inline( selector: dict[str, Any], - parent_properties: list[dict[str, Any]], + domain_index: dict[str, list[dict[str, Any]]], domain: str, ) -> list[dict[str, Any]] | None: """Attempt to satisfy a selector from the parent file's inline properties. - Returns ``None`` when no property in ``parent_properties`` carries - ``publisher_domain == domain`` — the inline path has no data for this - domain; a federated fetch (not yet implemented) would be the next step. - Returns ``[]`` when inline candidates exist for the domain but none pass - the selector filter — this is a real empty set; do NOT fall back. + ``domain_index`` is a pre-built mapping of publisher_domain → property list + (built once per ``_resolve_agent_properties`` call for O(1) per-domain + lookup instead of O(N) linear scan). + + Returns ``None`` when ``domain_index`` has no entry for ``domain`` — the + inline path has no data for this domain; a federated fetch would be next. + Returns ``[]`` when inline candidates exist but none pass the selector + filter — this is a real empty set; do NOT fall back. Handles ``selection_type`` values: ``"all"``, ``"by_tag"``, ``"by_id"``. Unknown types are treated permissively (return all domain candidates). """ - candidates = [ - p for p in parent_properties - if isinstance(p, dict) and p.get("publisher_domain") == domain - ] + candidates = domain_index.get(domain) if not candidates: return None # no inline data for this domain diff --git a/tests/test_adagents.py b/tests/test_adagents.py index c93d20f5..b1ac6c17 100644 --- a/tests/test_adagents.py +++ b/tests/test_adagents.py @@ -1478,6 +1478,60 @@ def test_get_properties_by_agent_publisher_properties_by_id(self): properties = get_properties_by_agent(adagents_data, "https://agent1.example.com") assert {p["property_id"] for p in properties} == {"ctv-001"} + def test_get_properties_by_agent_cafemedia_scale(self): + """Cafemedia/interchange.io canonical fixture: 6,843 inline properties across + 6,800 child domains, all raptive_managed, one authorized agent. + + Sized to catch O(N×M) regressions — at this scale an unindexed + implementation (~46 M ops) would cause a multi-second timeout. + """ + # 6,800 child publisher domains (cafemedia fan-out shape) + child_domains = [f"site{i:04d}.raptive.com" for i in range(6800)] + properties: list[dict] = [] + # One property per child domain + for i, domain in enumerate(child_domains): + properties.append({ + "property_id": f"p-{i:05d}", + "publisher_domain": domain, + "name": f"Site {i} — Raptive Managed", + "tags": ["raptive_managed"], + }) + # 43 extra properties on the first 43 domains (total: 6,843) + for i in range(43): + properties.append({ + "property_id": f"extra-{i:03d}", + "publisher_domain": child_domains[i], + "name": f"Site {i} Extra Property", + "tags": ["raptive_managed", "ctv"], + }) + + adagents_data = { + "properties": properties, + "authorized_agents": [ + { + "url": "https://interchange.io", + "authorization_type": "publisher_properties", + "authorized_for": "Raptive managed network", + "publisher_properties": [ + { + "publisher_domains": child_domains, + "selection_type": "by_tag", + "property_tags": ["raptive_managed"], + } + ], + } + ], + } + + result = get_properties_by_agent(adagents_data, "https://interchange.io") + assert len(result) == 6843 + result_domains = {p["publisher_domain"] for p in result} + assert result_domains <= set(child_domains) + assert all("raptive_managed" in p.get("tags", []) for p in result) + # Must return resolved property dicts, not selector dicts + assert all("property_id" in p for p in result) + assert not any("publisher_domains" in p for p in result) + def test_get_properties_by_agent_protocol_agnostic(self): """Should match agent URL regardless of protocol.""" adagents_data = { From 2f8f08780988dc02c7d5c326dfecc98877f63271 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 20 May 2026 14:40:40 +0000 Subject: [PATCH 4/4] test(adagents): tighten cafemedia fixture domain assertion to equality Reviewer noted the subset assertion (<=) would pass even if half the domains were missing. Changed to equality so all 6,800 child domains must appear in the resolved result. Part of #749. https://claude.ai/code/session_01RDYrywLhVbd4crrAHkTnsH --- tests/test_adagents.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_adagents.py b/tests/test_adagents.py index b1ac6c17..5db63a05 100644 --- a/tests/test_adagents.py +++ b/tests/test_adagents.py @@ -1526,7 +1526,7 @@ def test_get_properties_by_agent_cafemedia_scale(self): result = get_properties_by_agent(adagents_data, "https://interchange.io") assert len(result) == 6843 result_domains = {p["publisher_domain"] for p in result} - assert result_domains <= set(child_domains) + assert result_domains == set(child_domains) # all 6,800 must appear, not just a subset assert all("raptive_managed" in p.get("tags", []) for p in result) # Must return resolved property dicts, not selector dicts assert all("property_id" in p for p in result)