adcontextprotocol · bokelley · May 20, 2026 · May 20, 2026 · May 20, 2026 · May 20, 2026
diff --git a/src/adcp/adagents.py b/src/adcp/adagents.py
@@ -949,16 +949,102 @@ def _resolve_agent_properties(
             and {t for t in p.get("tags", []) if isinstance(t, str)} & authorized_tags
         ]
 
-    # Handle publisher_properties (cross-domain references)
+    # Handle publisher_properties: inline-resolution path per adcp#4827.
+    # For each selector, fan out over its domain(s), then try to satisfy from
+    # the parent file's top-level properties[] before considering a federated
+    # fetch. Federated fetch (per-domain HTTP) is a follow-up; this change
+    # fixes the primary bug of returning raw selector dicts instead of resolved
+    # property objects.
     if authorization_type == "publisher_properties":
-        publisher_props = agent.get("publisher_properties", [])
-        if not isinstance(publisher_props, list):
+        selectors = agent.get("publisher_properties", [])
+        if not isinstance(selectors, list):
             return []
-        return [p for p in publisher_props if isinstance(p, dict)]
+        # Pre-index parent properties by domain once — O(N) — so per-domain
+        # lookups are O(1) instead of O(N), avoiding O(N×M) at cafemedia scale
+        # (6,843 properties × 6,800 domains = 46 M ops without this index).
+        domain_index: dict[str, list[dict[str, Any]]] = {}
+        for p in top_level_properties:
+            if isinstance(p, dict):
+                d = p.get("publisher_domain")
+                if isinstance(d, str) and d:
+                    domain_index.setdefault(d, []).append(p)
+        resolved: list[dict[str, Any]] = []
+        seen_ids: set[str | None] = set()
+        for selector in selectors:
+            if not isinstance(selector, dict):
+                continue
+            for domain in _selector_domains(selector):
+                inline = _resolve_inline(selector, domain_index, domain)
+                if inline is not None:
+                    for prop in inline:
+                        pid = prop.get("property_id")
+                        if pid not in seen_ids:
+                            seen_ids.add(pid)
+                            resolved.append(prop)
+                    # inline succeeded; skip federated fetch for this domain
+                # inline is None → no parent-file data for domain; federated
+                # fetch would go here (not yet implemented; see #749 Part 2).
+        return resolved
 
     return []
 
 
+def _selector_domains(selector: dict[str, Any]) -> list[str]:
+    """Extract publisher domain(s) from a publisher_properties selector.
+
+    Handles both the scalar ``publisher_domain`` form and the compact
+    ``publisher_domains[]`` array form from adcp#4827.
+    """
+    domains = selector.get("publisher_domains")
+    if isinstance(domains, list):
+        return [d for d in domains if isinstance(d, str) and d]
+    domain = selector.get("publisher_domain")
+    if isinstance(domain, str) and domain:
+        return [domain]
+    return []
+
+
+def _resolve_inline(
+    selector: dict[str, Any],
+    domain_index: dict[str, list[dict[str, Any]]],
+    domain: str,
+) -> list[dict[str, Any]] | None:
+    """Attempt to satisfy a selector from the parent file's inline properties.
+
+    ``domain_index`` is a pre-built mapping of publisher_domain → property list
+    (built once per ``_resolve_agent_properties`` call for O(1) per-domain
+    lookup instead of O(N) linear scan).
+
+    Returns ``None`` when ``domain_index`` has no entry for ``domain`` — the
+    inline path has no data for this domain; a federated fetch would be next.
+    Returns ``[]`` when inline candidates exist but none pass the selector
+    filter — this is a real empty set; do NOT fall back.
+
+    Handles ``selection_type`` values: ``"all"``, ``"by_tag"``, ``"by_id"``.
+    Unknown types are treated permissively (return all domain candidates).
+    """
+    candidates = domain_index.get(domain)
+    if not candidates:
+        return None  # no inline data for this domain
+
+    selection_type = selector.get("selection_type", "all")
+    if selection_type == "all":
+        return list(candidates)
+    if selection_type == "by_tag":
+        required_tags = {t for t in selector.get("property_tags", []) if isinstance(t, str)}
+        if not required_tags:
+            return list(candidates)
+        return [
+            p for p in candidates
+            if required_tags & {t for t in p.get("tags", []) if isinstance(t, str)}
+        ]
+    if selection_type == "by_id":
+        required_ids = {i for i in selector.get("property_ids", []) if isinstance(i, str)}
+        return [p for p in candidates if p.get("property_id") in required_ids]
+    # Unknown selection_type — permissive fallback
+    return list(candidates)
+
+
 def get_all_properties(adagents_data: dict[str, Any]) -> list[dict[str, Any]]:
     """Extract all properties from adagents.json data.
 
@@ -1035,8 +1121,8 @@ def get_properties_by_agent(adagents_data: dict[str, Any], agent_url: str) -> li
     - inline_properties: Properties defined directly in the agent's properties array
     - property_ids: Filter top-level properties by property_id
     - property_tags: Filter top-level properties by tags
-    - publisher_properties: References properties from other publisher domains
-      (returns the selector objects, not resolved properties)
+    - publisher_properties: Inline-resolved properties from other publisher
+      domains (resolved from the parent file's top-level properties[] array)
 
     Args:
         adagents_data: Parsed adagents.json data

diff --git a/tests/test_adagents.py b/tests/test_adagents.py
@@ -1338,8 +1338,34 @@ def test_get_properties_by_agent_property_tags_multiple(self):
         assert properties[1]["name"] == "Site 2"
 
     def test_get_properties_by_agent_publisher_properties(self):
-        """Should return publisher_properties selectors for publisher_properties type."""
+        """publisher_properties resolves inline properties, not raw selector dicts."""
         adagents_data = {
+            "properties": [
+                {
+                    "property_id": "ctv-001",
+                    "publisher_domain": "cnn.com",
+                    "name": "CNN CTV",
+                    "tags": ["ctv"],
+                },
+                {
+                    "property_id": "ctv-002",
+                    "publisher_domain": "cnn.com",
+                    "name": "CNN Sports CTV",
+                    "tags": ["ctv", "sports"],
+                },
+                {
+                    "property_id": "web-001",
+                    "publisher_domain": "cnn.com",
+                    "name": "CNN Web",
+                    "tags": ["web"],
+                },
+                {
+                    "property_id": "espn-001",
+                    "publisher_domain": "espn.com",
+                    "name": "ESPN Home",
+                    "tags": ["sports"],
+                },
+            ],
             "authorized_agents": [
                 {
                     "url": "https://agent1.example.com",
@@ -1361,11 +1387,150 @@ def test_get_properties_by_agent_publisher_properties(self):
         }
 
         properties = get_properties_by_agent(adagents_data, "https://agent1.example.com")
-        assert len(properties) == 2
-        assert properties[0]["publisher_domain"] == "cnn.com"
-        assert properties[0]["selection_type"] == "by_tag"
-        assert properties[1]["publisher_domain"] == "espn.com"
-        assert properties[1]["selection_type"] == "all"
+        property_ids = {p["property_id"] for p in properties}
+        # by_tag "ctv": ctv-001 and ctv-002 match; web-001 does not
+        # all: espn-001 matches
+        assert property_ids == {"ctv-001", "ctv-002", "espn-001"}
+        # Must return resolved property dicts, not selector dicts
+        assert all("property_id" in p for p in properties)
+        assert not any("selection_type" in p for p in properties)
+
+    def test_get_properties_by_agent_publisher_domains_fanout(self):
+        """publisher_domains[] compact form fans out to per-domain inline resolution."""
+        adagents_data = {
+            "properties": [
+                {
+                    "property_id": "a-001",
+                    "publisher_domain": "site-a.com",
+                    "name": "Site A",
+                    "tags": ["news"],
+                },
+                {
+                    "property_id": "b-001",
+                    "publisher_domain": "site-b.com",
+                    "name": "Site B",
+                    "tags": ["news"],
+                },
+            ],
+            "authorized_agents": [
+                {
+                    "url": "https://agent1.example.com",
+                    "authorization_type": "publisher_properties",
+                    "authorized_for": "Multi-domain",
+                    "publisher_properties": [
+                        {
+                            "publisher_domains": ["site-a.com", "site-b.com"],
+                            "selection_type": "all",
+                        },
+                    ],
+                },
+            ],
+        }
+
+        properties = get_properties_by_agent(adagents_data, "https://agent1.example.com")
+        assert {p["property_id"] for p in properties} == {"a-001", "b-001"}
+
+    def test_get_properties_by_agent_publisher_properties_no_inline(self):
+        """When no parent-file properties exist for a domain, returns empty (no federated)."""
+        adagents_data = {
+            "properties": [],
+            "authorized_agents": [
+                {
+                    "url": "https://agent1.example.com",
+                    "authorization_type": "publisher_properties",
+                    "authorized_for": "Cross-domain",
+                    "publisher_properties": [
+                        {
+                            "publisher_domain": "external.com",
+                            "selection_type": "all",
+                        },
+                    ],
+                },
+            ],
+        }
+
+        properties = get_properties_by_agent(adagents_data, "https://agent1.example.com")
+        assert properties == []
+
+    def test_get_properties_by_agent_publisher_properties_by_id(self):
+        """publisher_properties with selection_type by_id filters by property_id."""
+        adagents_data = {
+            "properties": [
+                {"property_id": "ctv-001", "publisher_domain": "cnn.com", "name": "CNN CTV"},
+                {"property_id": "ctv-002", "publisher_domain": "cnn.com", "name": "CNN Web"},
+            ],
+            "authorized_agents": [
+                {
+                    "url": "https://agent1.example.com",
+                    "authorization_type": "publisher_properties",
+                    "authorized_for": "Specific properties",
+                    "publisher_properties": [
+                        {
+                            "publisher_domain": "cnn.com",
+                            "selection_type": "by_id",
+                            "property_ids": ["ctv-001"],
+                        },
+                    ],
+                },
+            ],
+        }
+
+        properties = get_properties_by_agent(adagents_data, "https://agent1.example.com")
+        assert {p["property_id"] for p in properties} == {"ctv-001"}
+
+    def test_get_properties_by_agent_cafemedia_scale(self):
+        """Cafemedia/interchange.io canonical fixture: 6,843 inline properties across
+        6,800 child domains, all raptive_managed, one authorized agent.
+
+        Sized to catch O(N×M) regressions — at this scale an unindexed
+        implementation (~46 M ops) would cause a multi-second timeout.
+        """
+        # 6,800 child publisher domains (cafemedia fan-out shape)
+        child_domains = [f"site{i:04d}.raptive.com" for i in range(6800)]
+        properties: list[dict] = []
+        # One property per child domain
+        for i, domain in enumerate(child_domains):
+            properties.append({
+                "property_id": f"p-{i:05d}",
+                "publisher_domain": domain,
+                "name": f"Site {i} — Raptive Managed",
+                "tags": ["raptive_managed"],
+            })
+        # 43 extra properties on the first 43 domains (total: 6,843)
+        for i in range(43):
+            properties.append({
+                "property_id": f"extra-{i:03d}",
+                "publisher_domain": child_domains[i],
+                "name": f"Site {i} Extra Property",
+                "tags": ["raptive_managed", "ctv"],
+            })
+
+        adagents_data = {
+            "properties": properties,
+            "authorized_agents": [
+                {
+                    "url": "https://interchange.io",
+                    "authorization_type": "publisher_properties",
+                    "authorized_for": "Raptive managed network",
+                    "publisher_properties": [
+                        {
+                            "publisher_domains": child_domains,
+                            "selection_type": "by_tag",
+                            "property_tags": ["raptive_managed"],
+                        }
+                    ],
+                }
+            ],
+        }
+
+        result = get_properties_by_agent(adagents_data, "https://interchange.io")
+        assert len(result) == 6843
+        result_domains = {p["publisher_domain"] for p in result}
+        assert result_domains == set(child_domains)  # all 6,800 must appear, not just a subset
+        assert all("raptive_managed" in p.get("tags", []) for p in result)
+        # Must return resolved property dicts, not selector dicts
+        assert all("property_id" in p for p in result)
+        assert not any("publisher_domains" in p for p in result)
 
     def test_get_properties_by_agent_protocol_agnostic(self):
         """Should match agent URL regardless of protocol."""