From 156d6aae773b1a34a72a3c015416ccb70f541658 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 20 May 2026 12:50:28 +0000
Subject: [PATCH 1/4] fix(adagents): inline-resolution path for
 publisher_properties selectors + publisher_domains[] fan-out

Fixes the bug where _resolve_agent_properties with authorization_type
"publisher_properties" returned raw selector dicts instead of resolved
property objects. Implements the inline-resolution path from adcp#4827:
for each selector, fan out over publisher_domain / publisher_domains[],
match against the parent file's top-level properties[] by publisher_domain,
apply selection_type filter (all / by_tag / by_id), and extend the result.

Also adds _selector_domains() and _resolve_inline() as internal helpers
with the None-vs-[] sentinel contract documented in their docstrings.
Federated fetch (HTTP per-domain fallback) is deferred per #749.

Refs #749
---
 src/adcp/adagents.py   | 84 +++++++++++++++++++++++++++++++++---
 tests/test_adagents.py | 97 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 169 insertions(+), 12 deletions(-)

diff --git a/src/adcp/adagents.py b/src/adcp/adagents.py
index f97ac5e0..e4ed5131 100644
--- a/src/adcp/adagents.py
+++ b/src/adcp/adagents.py
@@ -949,16 +949,88 @@ def _resolve_agent_properties(
             and {t for t in p.get("tags", []) if isinstance(t, str)} & authorized_tags
         ]
 
-    # Handle publisher_properties (cross-domain references)
+    # Handle publisher_properties: inline-resolution path per adcp#4827.
+    # For each selector, fan out over its domain(s), then try to satisfy from
+    # the parent file's top-level properties[] before considering a federated
+    # fetch. Federated fetch (per-domain HTTP) is a follow-up; this change
+    # fixes the primary bug of returning raw selector dicts instead of resolved
+    # property objects.
     if authorization_type == "publisher_properties":
-        publisher_props = agent.get("publisher_properties", [])
-        if not isinstance(publisher_props, list):
+        selectors = agent.get("publisher_properties", [])
+        if not isinstance(selectors, list):
             return []
-        return [p for p in publisher_props if isinstance(p, dict)]
+        resolved: list[dict[str, Any]] = []
+        for selector in selectors:
+            if not isinstance(selector, dict):
+                continue
+            for domain in _selector_domains(selector):
+                inline = _resolve_inline(selector, top_level_properties, domain)
+                if inline is not None:
+                    resolved.extend(inline)
+                    # inline succeeded; skip federated fetch for this domain
+                # inline is None → no parent-file data for domain; federated
+                # fetch would go here (not yet implemented).
+        return resolved
 
     return []
 
 
+def _selector_domains(selector: dict[str, Any]) -> list[str]:
+    """Extract publisher domain(s) from a publisher_properties selector.
+
+    Handles both the scalar ``publisher_domain`` form and the compact
+    ``publisher_domains[]`` array form from adcp#4827.
+    """
+    domains = selector.get("publisher_domains")
+    if isinstance(domains, list):
+        return [d for d in domains if isinstance(d, str) and d]
+    domain = selector.get("publisher_domain")
+    if isinstance(domain, str) and domain:
+        return [domain]
+    return []
+
+
+def _resolve_inline(
+    selector: dict[str, Any],
+    parent_properties: list[dict[str, Any]],
+    domain: str,
+) -> list[dict[str, Any]] | None:
+    """Attempt to satisfy a selector from the parent file's inline properties.
+
+    Returns ``None`` when no property in ``parent_properties`` carries
+    ``publisher_domain == domain`` — the caller MUST try a federated fetch.
+    Returns ``[]`` when inline candidates exist for the domain but none pass
+    the selector filter — this is a real empty set; the caller MUST NOT fall
+    back to federated.
+
+    Handles ``selection_type`` values: ``"all"``, ``"by_tag"``, ``"by_id"``.
+    Unknown types are treated permissively (return all domain candidates).
+    """
+    candidates = [
+        p for p in parent_properties
+        if isinstance(p, dict) and p.get("publisher_domain") == domain
+    ]
+    if not candidates:
+        return None  # no inline data for this domain
+
+    selection_type = selector.get("selection_type", "all")
+    if selection_type == "all":
+        return list(candidates)
+    if selection_type == "by_tag":
+        required_tags = {t for t in selector.get("property_tags", []) if isinstance(t, str)}
+        if not required_tags:
+            return list(candidates)
+        return [
+            p for p in candidates
+            if required_tags & {t for t in p.get("tags", []) if isinstance(t, str)}
+        ]
+    if selection_type == "by_id":
+        required_ids = set(selector.get("property_ids", []))
+        return [p for p in candidates if p.get("property_id") in required_ids]
+    # Unknown selection_type — permissive fallback
+    return list(candidates)
+
+
 def get_all_properties(adagents_data: dict[str, Any]) -> list[dict[str, Any]]:
     """Extract all properties from adagents.json data.
 
@@ -1035,8 +1107,8 @@ def get_properties_by_agent(adagents_data: dict[str, Any], agent_url: str) -> li
     - inline_properties: Properties defined directly in the agent's properties array
     - property_ids: Filter top-level properties by property_id
     - property_tags: Filter top-level properties by tags
-    - publisher_properties: References properties from other publisher domains
-      (returns the selector objects, not resolved properties)
+    - publisher_properties: Inline-resolved properties from other publisher
+      domains (resolved from the parent file's top-level properties[] array)
 
     Args:
         adagents_data: Parsed adagents.json data
diff --git a/tests/test_adagents.py b/tests/test_adagents.py
index 44195fd1..d3a89332 100644
--- a/tests/test_adagents.py
+++ b/tests/test_adagents.py
@@ -1338,8 +1338,34 @@ def test_get_properties_by_agent_property_tags_multiple(self):
         assert properties[1]["name"] == "Site 2"
 
     def test_get_properties_by_agent_publisher_properties(self):
-        """Should return publisher_properties selectors for publisher_properties type."""
+        """publisher_properties resolves inline properties, not raw selector dicts."""
         adagents_data = {
+            "properties": [
+                {
+                    "property_id": "ctv-001",
+                    "publisher_domain": "cnn.com",
+                    "name": "CNN CTV",
+                    "tags": ["ctv"],
+                },
+                {
+                    "property_id": "ctv-002",
+                    "publisher_domain": "cnn.com",
+                    "name": "CNN Sports CTV",
+                    "tags": ["ctv", "sports"],
+                },
+                {
+                    "property_id": "web-001",
+                    "publisher_domain": "cnn.com",
+                    "name": "CNN Web",
+                    "tags": ["web"],
+                },
+                {
+                    "property_id": "espn-001",
+                    "publisher_domain": "espn.com",
+                    "name": "ESPN Home",
+                    "tags": ["sports"],
+                },
+            ],
             "authorized_agents": [
                 {
                     "url": "https://agent1.example.com",
@@ -1361,11 +1387,70 @@ def test_get_properties_by_agent_publisher_properties(self):
         }
 
         properties = get_properties_by_agent(adagents_data, "https://agent1.example.com")
-        assert len(properties) == 2
-        assert properties[0]["publisher_domain"] == "cnn.com"
-        assert properties[0]["selection_type"] == "by_tag"
-        assert properties[1]["publisher_domain"] == "espn.com"
-        assert properties[1]["selection_type"] == "all"
+        property_ids = {p["property_id"] for p in properties}
+        # by_tag "ctv": ctv-001 and ctv-002 match; web-001 does not
+        # all: espn-001 matches
+        assert property_ids == {"ctv-001", "ctv-002", "espn-001"}
+        # Must return resolved property dicts, not selector dicts
+        assert all("property_id" in p for p in properties)
+        assert not any("selection_type" in p for p in properties)
+
+    def test_get_properties_by_agent_publisher_domains_fanout(self):
+        """publisher_domains[] compact form fans out to per-domain inline resolution."""
+        adagents_data = {
+            "properties": [
+                {
+                    "property_id": "a-001",
+                    "publisher_domain": "site-a.com",
+                    "name": "Site A",
+                    "tags": ["news"],
+                },
+                {
+                    "property_id": "b-001",
+                    "publisher_domain": "site-b.com",
+                    "name": "Site B",
+                    "tags": ["news"],
+                },
+            ],
+            "authorized_agents": [
+                {
+                    "url": "https://agent1.example.com",
+                    "authorization_type": "publisher_properties",
+                    "authorized_for": "Multi-domain",
+                    "publisher_properties": [
+                        {
+                            "publisher_domains": ["site-a.com", "site-b.com"],
+                            "selection_type": "all",
+                        },
+                    ],
+                },
+            ],
+        }
+
+        properties = get_properties_by_agent(adagents_data, "https://agent1.example.com")
+        assert {p["property_id"] for p in properties} == {"a-001", "b-001"}
+
+    def test_get_properties_by_agent_publisher_properties_no_inline(self):
+        """When no parent-file properties exist for a domain, returns empty (no federated)."""
+        adagents_data = {
+            "properties": [],
+            "authorized_agents": [
+                {
+                    "url": "https://agent1.example.com",
+                    "authorization_type": "publisher_properties",
+                    "authorized_for": "Cross-domain",
+                    "publisher_properties": [
+                        {
+                            "publisher_domain": "external.com",
+                            "selection_type": "all",
+                        },
+                    ],
+                },
+            ],
+        }
+
+        properties = get_properties_by_agent(adagents_data, "https://agent1.example.com")
+        assert properties == []
 
     def test_get_properties_by_agent_protocol_agnostic(self):
         """Should match agent URL regardless of protocol."""

From ee665bedd62beb036242cd434b214c02883d9f97 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 20 May 2026 12:53:35 +0000
Subject: [PATCH 2/4] fix(adagents): dedup resolved properties, fix by_id str
 filter, update docstring

- Deduplicate by property_id across multi-selector/multi-domain fan-out so
  a property appearing in two selectors isn't returned twice
- Filter by_id property_ids to str members (matches the by_tag pattern)
- Reword _resolve_inline docstring: None means no inline data for the domain
  (federated not yet implemented), not a MUST-federate contract
- Add by_id selection_type unit test

Refs #749
---
 src/adcp/adagents.py   | 17 +++++++++++------
 tests/test_adagents.py | 26 ++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/src/adcp/adagents.py b/src/adcp/adagents.py
index e4ed5131..2a7048fe 100644
--- a/src/adcp/adagents.py
+++ b/src/adcp/adagents.py
@@ -960,16 +960,21 @@ def _resolve_agent_properties(
         if not isinstance(selectors, list):
             return []
         resolved: list[dict[str, Any]] = []
+        seen_ids: set[str | None] = set()
         for selector in selectors:
             if not isinstance(selector, dict):
                 continue
             for domain in _selector_domains(selector):
                 inline = _resolve_inline(selector, top_level_properties, domain)
                 if inline is not None:
-                    resolved.extend(inline)
+                    for prop in inline:
+                        pid = prop.get("property_id")
+                        if pid not in seen_ids:
+                            seen_ids.add(pid)
+                            resolved.append(prop)
                     # inline succeeded; skip federated fetch for this domain
                 # inline is None → no parent-file data for domain; federated
-                # fetch would go here (not yet implemented).
+                # fetch would go here (not yet implemented; see #749 Part 2).
         return resolved
 
     return []
@@ -998,10 +1003,10 @@ def _resolve_inline(
     """Attempt to satisfy a selector from the parent file's inline properties.
 
     Returns ``None`` when no property in ``parent_properties`` carries
-    ``publisher_domain == domain`` — the caller MUST try a federated fetch.
+    ``publisher_domain == domain`` — the inline path has no data for this
+    domain; a federated fetch (not yet implemented) would be the next step.
     Returns ``[]`` when inline candidates exist for the domain but none pass
-    the selector filter — this is a real empty set; the caller MUST NOT fall
-    back to federated.
+    the selector filter — this is a real empty set; do NOT fall back.
 
     Handles ``selection_type`` values: ``"all"``, ``"by_tag"``, ``"by_id"``.
     Unknown types are treated permissively (return all domain candidates).
@@ -1025,7 +1030,7 @@ def _resolve_inline(
             if required_tags & {t for t in p.get("tags", []) if isinstance(t, str)}
         ]
     if selection_type == "by_id":
-        required_ids = set(selector.get("property_ids", []))
+        required_ids = {i for i in selector.get("property_ids", []) if isinstance(i, str)}
         return [p for p in candidates if p.get("property_id") in required_ids]
     # Unknown selection_type — permissive fallback
     return list(candidates)
diff --git a/tests/test_adagents.py b/tests/test_adagents.py
index d3a89332..c93d20f5 100644
--- a/tests/test_adagents.py
+++ b/tests/test_adagents.py
@@ -1452,6 +1452,32 @@ def test_get_properties_by_agent_publisher_properties_no_inline(self):
         properties = get_properties_by_agent(adagents_data, "https://agent1.example.com")
         assert properties == []
 
+    def test_get_properties_by_agent_publisher_properties_by_id(self):
+        """publisher_properties with selection_type by_id filters by property_id."""
+        adagents_data = {
+            "properties": [
+                {"property_id": "ctv-001", "publisher_domain": "cnn.com", "name": "CNN CTV"},
+                {"property_id": "ctv-002", "publisher_domain": "cnn.com", "name": "CNN Web"},
+            ],
+            "authorized_agents": [
+                {
+                    "url": "https://agent1.example.com",
+                    "authorization_type": "publisher_properties",
+                    "authorized_for": "Specific properties",
+                    "publisher_properties": [
+                        {
+                            "publisher_domain": "cnn.com",
+                            "selection_type": "by_id",
+                            "property_ids": ["ctv-001"],
+                        },
+                    ],
+                },
+            ],
+        }
+
+        properties = get_properties_by_agent(adagents_data, "https://agent1.example.com")
+        assert {p["property_id"] for p in properties} == {"ctv-001"}
+
     def test_get_properties_by_agent_protocol_agnostic(self):
         """Should match agent URL regardless of protocol."""
         adagents_data = {

From cd530bf02b0757c7aa186b7f130ffe9bb6dc2172 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 20 May 2026 14:30:04 +0000
Subject: [PATCH 3/4] =?UTF-8?q?perf(adagents):=20index=20parent=20properti?=
 =?UTF-8?q?es=20by=20domain=20to=20fix=20O(N=C3=97M)=20at=20cafemedia=20sc?=
 =?UTF-8?q?ale?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-builds a domain→properties index in _resolve_agent_properties before
the selector loop so each per-domain inline lookup is O(1) rather than
O(N). At cafemedia scale (6,843 properties × 6,800 domains) the
unindexed path was ~46 M comparisons; with the index it is O(N+M).

Also adds the cafemedia/interchange.io canonical fixture (6,843
properties, publisher_domains[] compact form) to the test suite —
sized to expose O(N×M) regressions and validate the raptive_managed
tag filter against the production managed-network shape.

Part of #749.

https://claude.ai/code/session_01RDYrywLhVbd4crrAHkTnsH
---
 src/adcp/adagents.py   | 31 +++++++++++++++---------
 tests/test_adagents.py | 54 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/src/adcp/adagents.py b/src/adcp/adagents.py
index 2a7048fe..b182f3c2 100644
--- a/src/adcp/adagents.py
+++ b/src/adcp/adagents.py
@@ -959,13 +959,22 @@ def _resolve_agent_properties(
         selectors = agent.get("publisher_properties", [])
         if not isinstance(selectors, list):
             return []
+        # Pre-index parent properties by domain once — O(N) — so per-domain
+        # lookups are O(1) instead of O(N), avoiding O(N×M) at cafemedia scale
+        # (6,843 properties × 6,800 domains = 46 M ops without this index).
+        domain_index: dict[str, list[dict[str, Any]]] = {}
+        for p in top_level_properties:
+            if isinstance(p, dict):
+                d = p.get("publisher_domain")
+                if isinstance(d, str) and d:
+                    domain_index.setdefault(d, []).append(p)
         resolved: list[dict[str, Any]] = []
         seen_ids: set[str | None] = set()
         for selector in selectors:
             if not isinstance(selector, dict):
                 continue
             for domain in _selector_domains(selector):
-                inline = _resolve_inline(selector, top_level_properties, domain)
+                inline = _resolve_inline(selector, domain_index, domain)
                 if inline is not None:
                     for prop in inline:
                         pid = prop.get("property_id")
@@ -997,24 +1006,24 @@ def _selector_domains(selector: dict[str, Any]) -> list[str]:
 
 def _resolve_inline(
     selector: dict[str, Any],
-    parent_properties: list[dict[str, Any]],
+    domain_index: dict[str, list[dict[str, Any]]],
     domain: str,
 ) -> list[dict[str, Any]] | None:
     """Attempt to satisfy a selector from the parent file's inline properties.
 
-    Returns ``None`` when no property in ``parent_properties`` carries
-    ``publisher_domain == domain`` — the inline path has no data for this
-    domain; a federated fetch (not yet implemented) would be the next step.
-    Returns ``[]`` when inline candidates exist for the domain but none pass
-    the selector filter — this is a real empty set; do NOT fall back.
+    ``domain_index`` is a pre-built mapping of publisher_domain → property list
+    (built once per ``_resolve_agent_properties`` call for O(1) per-domain
+    lookup instead of O(N) linear scan).
+
+    Returns ``None`` when ``domain_index`` has no entry for ``domain`` — the
+    inline path has no data for this domain; a federated fetch would be next.
+    Returns ``[]`` when inline candidates exist but none pass the selector
+    filter — this is a real empty set; do NOT fall back.
 
     Handles ``selection_type`` values: ``"all"``, ``"by_tag"``, ``"by_id"``.
     Unknown types are treated permissively (return all domain candidates).
     """
-    candidates = [
-        p for p in parent_properties
-        if isinstance(p, dict) and p.get("publisher_domain") == domain
-    ]
+    candidates = domain_index.get(domain)
     if not candidates:
         return None  # no inline data for this domain
 
diff --git a/tests/test_adagents.py b/tests/test_adagents.py
index c93d20f5..b1ac6c17 100644
--- a/tests/test_adagents.py
+++ b/tests/test_adagents.py
@@ -1478,6 +1478,60 @@ def test_get_properties_by_agent_publisher_properties_by_id(self):
         properties = get_properties_by_agent(adagents_data, "https://agent1.example.com")
         assert {p["property_id"] for p in properties} == {"ctv-001"}
 
+    def test_get_properties_by_agent_cafemedia_scale(self):
+        """Cafemedia/interchange.io canonical fixture: 6,843 inline properties across
+        6,800 child domains, all raptive_managed, one authorized agent.
+
+        Sized to catch O(N×M) regressions — at this scale an unindexed
+        implementation (~46 M ops) would cause a multi-second timeout.
+        """
+        # 6,800 child publisher domains (cafemedia fan-out shape)
+        child_domains = [f"site{i:04d}.raptive.com" for i in range(6800)]
+        properties: list[dict] = []
+        # One property per child domain
+        for i, domain in enumerate(child_domains):
+            properties.append({
+                "property_id": f"p-{i:05d}",
+                "publisher_domain": domain,
+                "name": f"Site {i} — Raptive Managed",
+                "tags": ["raptive_managed"],
+            })
+        # 43 extra properties on the first 43 domains (total: 6,843)
+        for i in range(43):
+            properties.append({
+                "property_id": f"extra-{i:03d}",
+                "publisher_domain": child_domains[i],
+                "name": f"Site {i} Extra Property",
+                "tags": ["raptive_managed", "ctv"],
+            })
+
+        adagents_data = {
+            "properties": properties,
+            "authorized_agents": [
+                {
+                    "url": "https://interchange.io",
+                    "authorization_type": "publisher_properties",
+                    "authorized_for": "Raptive managed network",
+                    "publisher_properties": [
+                        {
+                            "publisher_domains": child_domains,
+                            "selection_type": "by_tag",
+                            "property_tags": ["raptive_managed"],
+                        }
+                    ],
+                }
+            ],
+        }
+
+        result = get_properties_by_agent(adagents_data, "https://interchange.io")
+        assert len(result) == 6843
+        result_domains = {p["publisher_domain"] for p in result}
+        assert result_domains <= set(child_domains)
+        assert all("raptive_managed" in p.get("tags", []) for p in result)
+        # Must return resolved property dicts, not selector dicts
+        assert all("property_id" in p for p in result)
+        assert not any("publisher_domains" in p for p in result)
+
     def test_get_properties_by_agent_protocol_agnostic(self):
         """Should match agent URL regardless of protocol."""
         adagents_data = {

From 2f8f08780988dc02c7d5c326dfecc98877f63271 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Wed, 20 May 2026 14:40:40 +0000
Subject: [PATCH 4/4] test(adagents): tighten cafemedia fixture domain
 assertion to equality

Reviewer noted the subset assertion (<=) would pass even if half the
domains were missing. Changed to equality so all 6,800 child domains
must appear in the resolved result.

Part of #749.

https://claude.ai/code/session_01RDYrywLhVbd4crrAHkTnsH
---
 tests/test_adagents.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_adagents.py b/tests/test_adagents.py
index b1ac6c17..5db63a05 100644
--- a/tests/test_adagents.py
+++ b/tests/test_adagents.py
@@ -1526,7 +1526,7 @@ def test_get_properties_by_agent_cafemedia_scale(self):
         result = get_properties_by_agent(adagents_data, "https://interchange.io")
         assert len(result) == 6843
         result_domains = {p["publisher_domain"] for p in result}
-        assert result_domains <= set(child_domains)
+        assert result_domains == set(child_domains)  # all 6,800 must appear, not just a subset
         assert all("raptive_managed" in p.get("tags", []) for p in result)
         # Must return resolved property dicts, not selector dicts
         assert all("property_id" in p for p in result)