From b9044e4f183a1c72d2415a9fb60865664dc9ca60 Mon Sep 17 00:00:00 2001 From: lbenzoni Date: Mon, 25 May 2026 14:51:53 +0200 Subject: [PATCH 1/9] feat: changed Docker compose to handle hermes in gateway mode --- services/hermes_platform_gateway/.env.example | 20 + services/hermes_platform_gateway/Dockerfile | 27 +- services/hermes_platform_gateway/README.md | 21 +- .../hermes_platform_gateway/entrypoint.sh | 48 ++ .../hermes-config.yaml | 36 ++ .../plugins/hermes-platform-gateway/README.md | 22 +- .../hermes_platform_gateway/__init__.py | 457 +++++++++----- .../hermes_platform_gateway/adapter.py | 555 ++++++++++++++++++ .../hermes_platform_gateway/client.py | 219 +------ .../hermes_platform_gateway/relay_client.py | 133 +++++ .../hermes-platform-gateway/plugin.yaml | 4 +- .../hermes-platform-gateway/pyproject.toml | 2 +- 12 files changed, 1154 insertions(+), 390 deletions(-) create mode 100644 services/hermes_platform_gateway/entrypoint.sh create mode 100644 services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py create mode 100644 services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/relay_client.py diff --git a/services/hermes_platform_gateway/.env.example b/services/hermes_platform_gateway/.env.example index 7064317..956d630 100644 --- a/services/hermes_platform_gateway/.env.example +++ b/services/hermes_platform_gateway/.env.example @@ -26,6 +26,26 @@ DOMYN_CHANNEL_ID= # Canvas tool-list poll interval in seconds. Set to 0 to disable. Default 60. # PLATFORM_TOOL_REFRESH_INTERVAL=60 +# --- Authorization (required for gateway mode) ------------------------------- +# In gateway mode hermes runs its own per-user authorization layer on top of +# the platform. Domyn already authenticates callers at the relay/API layer, +# so we typically delegate fully and accept any author arriving on the +# subscribed channel. Without this, unknown users get a pairing prompt: +# "Hi~ I don't recognize you yet! Here's your pairing code: …" +DOMYN_ALLOW_ALL_USERS=true + +# Optional alternative: a comma-separated allowlist of Domyn author IDs +# (leave DOMYN_ALLOW_ALL_USERS unset if you use this). +# DOMYN_ALLOWED_USERS=user-id-1,user-id-2 + +# --- Home channel (recommended) ---------------------------------------------- +# Without this, gateway-mode hermes posts a one-time +# "πŸ“¬ No home channel is set for Domyn. Type /sethome …" +# notice into the first conversation it sees. The Domyn worker is already +# bound to one channel, so we just point the home target at it β€” cron +# results and broadcasts go back to the same channel. +DOMYN_HOME_CHANNEL=${DOMYN_CHANNEL_ID} + # --- LLM provider (required by hermes itself) -------------------------------- # Hermes needs at least one model provider. Defaults below target the # vLLM gateway used by the example agents in `custom_samples`; swap for diff --git a/services/hermes_platform_gateway/Dockerfile b/services/hermes_platform_gateway/Dockerfile index 5b9a244..98dba79 100644 --- a/services/hermes_platform_gateway/Dockerfile +++ b/services/hermes_platform_gateway/Dockerfile @@ -42,16 +42,17 @@ RUN pip install --no-cache-dir /opt/hermes-platform-gateway && \ cp /opt/hermes-platform-gateway/plugin.yaml \ ${HERMES_HOME}/plugins/hermes_platform_gateway/ -# 5) hermes config: -# - point hermes' default model at the vLLM gateway (api_key + base_url + -# model come from env vars at startup via hermes' ${VAR} expansion); -# - opt-in the platform-gateway plugin β€” standalone-kind plugins are -# skipped unless explicitly listed under `plugins.enabled`. -COPY hermes-config.yaml ${HERMES_HOME}/config.yaml - -# 6) Run hermes as a relay-attached worker. Stdin is closed in containers, -# so we drop into `hermes chat` non-interactively β€” the plugin's -# `on_session_start` hook will still fire, the WS subscriber stays open -# in a daemon thread, and AGENT_START events injected from the platform -# drive the conversation. -CMD ["hermes", "chat"] +# 5) hermes config template + entrypoint that materialises it. +# gateway mode reads ~/.hermes/config.yaml via read_raw_config() which +# does NOT expand ${VAR} references β€” so we resolve them at container +# start with entrypoint.sh and write the result into $HERMES_HOME. +COPY hermes-config.yaml /opt/hermes-config.template.yaml +COPY entrypoint.sh /usr/local/bin/entrypoint.sh +RUN chmod +x /usr/local/bin/entrypoint.sh + +# 6) Run hermes in gateway mode. The gateway is the headless, multi-session +# runner β€” it doesn't open a TUI and doesn't need a TTY. AGENT_START +# events for any conversation_id arriving on the subscribed channel are +# routed by the gateway to a per-conversation hermes session. +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] +CMD ["hermes", "gateway"] diff --git a/services/hermes_platform_gateway/README.md b/services/hermes_platform_gateway/README.md index 712235f..160a16d 100644 --- a/services/hermes_platform_gateway/README.md +++ b/services/hermes_platform_gateway/README.md @@ -18,17 +18,16 @@ hermes_platform_gateway/ ## How it fits together -1. The Dockerfile installs hermes-agent from git, the `domyn-agents` wheel, then `pip install /opt/hermes-platform-gateway` so `hermes_platform_gateway` is importable. +1. The Dockerfile installs hermes-agent, the `domyn-agents` wheel, then `pip install /opt/hermes-platform-gateway` so `hermes_platform_gateway` is importable. 2. The same plugin folder is *also* copied under `$HERMES_HOME/plugins/hermes_platform_gateway/` so hermes' plugin loader picks up `plugin.yaml` and calls `register(ctx)`. -3. `config.yaml` opt-ins the plugin under `plugins.enabled` β€” standalone-kind plugins are skipped otherwise. -4. On startup, `register(ctx)`: - - POSTs `https://api./api/agents-service/tool/list_delegate_tools_for_channel` to discover canvas tools (uses the `api.` subdomain β€” the same shape `domyn expose` uses). - - Registers each tool into hermes' `platform` toolset with a sync handler that forwards calls over the relay WebSocket. - - Opens `wss:///relay/v1/ws` with `api-key` / `space-id` / `channel-id` headers, and runs a background daemon thread that: - - injects platform `AGENT_START` events into hermes via `ctx.inject_message`, - - streams tokens back as `RESPONSE(is_partial=True)` events, - - sends `AGENT_END` after each LLM turn, - - resolves outstanding `TOOL_END`/`TOOL_ERROR` to the matching `concurrent.futures.Future` by `call_id`. +3. `config.yaml` opt-ins the plugin under `plugins.enabled`. +4. The container runs `hermes gateway` β€” the headless, multi-session runner. No TUI, no TTY required. +5. On startup, `register(ctx)`: + - POSTs `https://api./api/agents-service/tool/list_delegate_tools_for_channel` to discover canvas tools. + - Registers each tool into hermes' `platform` toolset with an async handler that forwards calls over the relay WebSocket, correlated to the active conversation. + - Registers a `domyn` platform adapter via `ctx.register_platform`. The adapter owns the WebSocket and translates `AGENT_START` events into hermes `MessageEvent`s with `chat_id = conversation_id`. + - Registers an `on_session_start` hook that links hermes' `session_id` to the adapter's `session_key`, so tool handlers can find the right turn correlation IDs. +6. The hermes gateway maintains one `AIAgent` per Domyn `conversation_id` (LRU-cached), with per-session SQLite-backed history. Different conversations run concurrently; same-conversation messages stay serialised. ## Prerequisites @@ -96,7 +95,7 @@ make build && make up |---|---| | Container logs: `4401 Unauthorized` on WS connect | `DOMYN_API_KEY` lacks worker-role scope on this channel. Regenerate it from the platform with worker permissions. | | Logs say `registered 0 platform tool(s)` | Canvas has no tools attached, or `DOMYN_CHANNEL_ID` points at the wrong channel. Verify with `curl https://api./api/agents-service/tool/list_delegate_tools_for_channel -H 'api-key: …' -d '{"space_id":"…","channel_id":"…","configuration_id":null}'`. | -| Two workers responding to the same message | Multiple containers/processes are subscribed to the same `channel-id`. Only one worker should subscribe per channel. | +| Two workers responding to the same message | Multiple containers/processes are subscribed to the same `channel-id`. Only one worker per channel. Multi-conversation works *within* one worker β€” not by running more workers on the same channel. | | Discovery returns tools but WS reconnects in a tight loop | Same as above β€” relay kicks each subscriber off when another connects. | ## Stopping diff --git a/services/hermes_platform_gateway/entrypoint.sh b/services/hermes_platform_gateway/entrypoint.sh new file mode 100644 index 0000000..ed98c21 --- /dev/null +++ b/services/hermes_platform_gateway/entrypoint.sh @@ -0,0 +1,48 @@ +#!/bin/sh +# hermes-platform-gateway entrypoint. +# +# hermes' gateway-mode config loader (gateway/run.py:_load_gateway_config β†’ +# hermes_cli/config.read_raw_config) reads ~/.hermes/config.yaml RAW β€” +# without expanding ${VAR} references the way load_config() does. So we +# materialise the resolved config here before hermes starts. +# +# The image ships a template at /opt/hermes-config.template.yaml; we +# substitute env vars and write the result to $HERMES_HOME/config.yaml on +# every boot. Unconditional overwrite is intentional: this deployment +# treats hermes' config as ephemeral (Domyn is the source of truth for +# everything user-mutable). + +set -e + +: "${HERMES_HOME:=/root/.hermes}" +TEMPLATE=/opt/hermes-config.template.yaml +TARGET="$HERMES_HOME/config.yaml" + +mkdir -p "$HERMES_HOME" + +python3 - <<'PY' +import os +import re +import sys + +template_path = "/opt/hermes-config.template.yaml" +target_path = os.path.join(os.environ.get("HERMES_HOME", "/root/.hermes"), "config.yaml") + +with open(template_path, encoding="utf-8") as f: + text = f.read() + +def _sub(match): + name = match.group(1) + val = os.environ.get(name) + if val is None: + print(f"warning: ${{{name}}} is not set in the environment", file=sys.stderr) + return match.group(0) + return val + +resolved = re.sub(r"\$\{([A-Z0-9_]+)\}", _sub, text) + +with open(target_path, "w", encoding="utf-8") as f: + f.write(resolved) +PY + +exec "$@" diff --git a/services/hermes_platform_gateway/hermes-config.yaml b/services/hermes_platform_gateway/hermes-config.yaml index 27e9822..4e0bb46 100644 --- a/services/hermes_platform_gateway/hermes-config.yaml +++ b/services/hermes_platform_gateway/hermes-config.yaml @@ -10,3 +10,39 @@ model: plugins: enabled: - hermes_platform_gateway + +agent: + # The Domyn canvas is the source of truth for tools on this worker. Drop + # hermes' built-in web + browser toolsets so: + # 1. They don't shadow-conflict with canvas tools of the same name + # (e.g. canvas `web_search` vs built-in `web_search`). + # 2. The Chromium-not-installed warning at startup goes away. + # Anything the canvas wants to expose still arrives via the platform-gateway + # plugin's tool registration. + disabled_toolsets: + - web + - browser + # Stop hermes from injecting periodic "⏳ Still working…" progress + # messages into the chat. Default 180s; set 0 to disable. Without this + # users see "Still working..." every 3 minutes during long turns. + gateway_notify_interval: 0 + +display: + # How hermes handles a new message that arrives during an active turn. + # Default "interrupt" sends a visible "⚑ Interrupting current task…" + # status message β€” disruptive in a chat UI. "queue" silently FIFO-s + # the new message behind the current one; nothing visible until the + # current turn completes. "steer" injects as additional context. + busy_input_mode: queue + # Suppress hermes' mid-turn narrative chat messages ("Ti aiuto a + # creare uno script…", "Let me check the docs first…", etc). + # Default true β†’ hermes calls `interim_assistant_callback` for every + # bit of LLM text that lands alongside a tool call, each becoming its + # own outbound AGENT_END frame. Apps that treat AGENT_END as the + # turn-terminator break when an interim frame arrives mid-turn. + # With false hermes stays silent until the final reply, so each turn + # produces exactly one AGENT_END (the deliverable) followed by END. + # Trade-off: long turns appear silent in the chat until the final + # answer lands β€” TOOL_START/TOOL_END visibility frames still go out, + # which is what keeps the UI alive in the meantime. + interim_assistant_messages: false diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/README.md b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/README.md index 60e02e5..1c87c38 100644 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/README.md +++ b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/README.md @@ -6,14 +6,15 @@ A hermes-agent pip plugin that dynamically registers canvas-connected tools at s ## How it works -1. **Tool discovery** β€” on startup, the plugin calls `POST /api/agents-service/tool/list_delegate_tools_for_channel` (on the `api.` subdomain, matching `domyn expose`) to fetch the tools currently connected on the canvas for the given `space_id` + `channel_id` (and optional `configuration_id`). +1. **Tool discovery** β€” on startup, the plugin POSTs `/api/agents-service/tool/list_delegate_tools_for_channel` to fetch the canvas tools for the given `space_id` + `channel_id` (and optional `configuration_id`). 2. **Schema conversion** β€” platform parameter lists are translated to hermes JSON Schema objects and registered with `ctx.register_tool`. -3. **Relay connection** β€” a background daemon thread opens `wss://{DOMYN_BASE_URL}/relay/v1/ws` with the auth headers (`channel-id`, `space-id`, `api-key`). -4. **Tool call flow** β€” when hermes invokes a platform tool, the handler sends a `TOOL_START` relay message and blocks on a `concurrent.futures.Future` until the matching `TOOL_END` (keyed by `call_id`) resolves it. -5. **Reconnection** β€” if the WebSocket drops, in-flight calls fail immediately with an error JSON and the background loop reconnects with full-jitter exponential backoff. -6. **Canvas changes** β€” a `RefreshLoop` daemon thread polls the tool list every `PLATFORM_TOOL_REFRESH_INTERVAL` seconds (default 60). New tools are registered, removed tools are deregistered, unchanged tools are left alone. -7. **Inbound user input** β€” when the platform sends an `AGENT_START` relay event, the plugin extracts the user text and injects it into hermes via `ctx.inject_message`, triggering a normal agent turn. -8. **Turn completion** β€” after each LLM turn (`post_llm_call` hook), the plugin sends one `AGENT_END` relay event carrying the full assistant response and the correlation IDs (`author`, `interaction_id`, `turn_id`, `conversation_id`) from the originating `AGENT_START`. We intentionally do *not* stream tokens as `RESPONSE(is_partial=True)` events: the platform treats each `RESPONSE` as its own block (joined with newlines in the UI) and a non-empty `AGENT_END` is "promoted" to a final `RESPONSE` β€” mixing the two paths produces duplicated output. The current shape mirrors what `domyn expose`'s Runtime does. +3. **Platform adapter registration** β€” `register(ctx)` calls `ctx.register_platform("domyn", "Domyn", factory, check_fn)`. Hermes' gateway runner instantiates the adapter when the gateway starts. +4. **Single WebSocket, multiple conversations** β€” the adapter opens one `wss://{DOMYN_BASE_URL}/relay/v1/ws` connection. Inbound `AGENT_START` events are demultiplexed by `conversation_id` and translated to hermes `MessageEvent`s with `session_key = f"domyn:{channel_id}:{conversation_id}"`. +5. **Per-conversation sessions** β€” hermes' `GatewayRunner` maintains one `AIAgent` per `session_key`, cached LRU, with per-session SQLite-backed history. Different conversations run concurrently in separate asyncio tasks. +6. **Outbound responses** β€” when an `AIAgent` finishes its turn, the gateway calls `adapter.send(chat_id=conversation_id, text)`. The adapter looks up the originating `AGENT_START`, copies its correlation IDs, and emits one `AGENT_END` frame. +7. **Tool calls** β€” the tool handler closure reads `parent_agent.session_id`, looks up the `session_key` via the adapter's `_session_id_to_key` map (populated by an `on_session_start` hook), then sends a `TOOL_START` with that conversation's correlation IDs. `TOOL_END`/`TOOL_ERROR` resolve a per-`call_id` future. +8. **Reconnection** β€” the adapter reconnects with full-jitter exponential backoff. In-flight tool calls fail with an error JSON; in-flight hermes turns continue locally (but their response is lost if the WS is still down at send time β€” accepted v1 limitation). +9. **Canvas changes** β€” `RefreshLoop` polls the tool list every `PLATFORM_TOOL_REFRESH_INTERVAL` seconds, registers new tools, deregisters removed ones. --- @@ -27,7 +28,7 @@ A hermes-agent pip plugin that dynamically registers canvas-connected tools at s ## Installation -The plugin manifest (`plugin.yaml` + `__init__.py`) lives in `~/.hermes/plugins/hermes_platform_gateway/`, but `register()` imports the actual implementation (`fetch_tools`, `GatewayConnection`, …) from the pip-installed `hermes_platform_gateway` package β€” so you must install **into the hermes-agent venv**, not whichever Python happens to be on `$PATH`: +The plugin manifest (`plugin.yaml` + `__init__.py`) lives in `~/.hermes/plugins/hermes_platform_gateway/`, but `register()` imports the actual implementation (`fetch_tools`, `RefreshLoop`, `build_ws_url`, …) from the pip-installed `hermes_platform_gateway` package β€” so you must install **into the hermes-agent venv**, not whichever Python happens to be on `$PATH`: ```bash HERMES_VENV=~/.hermes/hermes-agent/venv @@ -185,7 +186,7 @@ uv pip install -e ".[dev]" uv run --active pytest tests/ -v # Run a single file -uv run --active pytest tests/test_gateway.py -v +uv run --active pytest tests/test_relay_client.py -v ``` Test files: @@ -194,7 +195,8 @@ Test files: |---|---| | `tests/test_schema.py` | `convert_schema()` β€” type mapping, required/optional/default, unknown types | | `tests/test_client.py` | `fetch_tools()` HTTP requests, `build_ws_url()` scheme selection | -| `tests/test_gateway.py` | `GatewayConnection` internals β€” receive loop, fail-pending, call_tool round-trip, `_serialize_observation` | +| `tests/test_relay_client.py` | `DomynRelayClient` β€” framing, receive loop, full-jitter reconnect backoff | +| `tests/test_adapter.py` | `DomynPlatformAdapter` β€” inbound AGENT_START, send/AGENT_END, tool-call routing, session_id↔key map | | `tests/test_register.py` | `register(ctx)` β€” env var checks, schema wiring, handler delegation, correct WS URL and headers | | `tests/test_integration.py` | End-to-end against a real in-process stub β€” tool discovery, TOOL_START/END round-trip, inbound AGENT_START, outbound send_event, auth headers | diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/__init__.py b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/__init__.py index 63e0c85..1b1ca0e 100644 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/__init__.py +++ b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/__init__.py @@ -1,125 +1,377 @@ -"""Hermes platform gateway plugin β€” registers canvas tools dynamically at startup.""" +"""hermes-platform-gateway plugin β€” gateway-mode multi-conversation adapter. + +Registers a Domyn platform adapter via ``ctx.register_platform``. The +adapter opens one WebSocket per worker to the Domyn relay and routes +each ``conversation_id`` to its own hermes session. +""" from __future__ import annotations +import asyncio import json import logging import os -import threading -from typing import Any +import uuid +from typing import Any, Callable + +from hermes_platform_gateway.client import ( + fetch_tools, + build_ws_url, + RefreshLoop, +) +from hermes_platform_gateway.schema import convert_schema logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + _REQUIRED = ("DOMYN_API_KEY", "DOMYN_BASE_URL", "DOMYN_SPACE_ID", "DOMYN_CHANNEL_ID") -def register(ctx) -> None: - """Called once by the hermes plugin loader at startup. +# --------------------------------------------------------------------------- +# Helpers β€” pure functions, no adapter / hermes state +# --------------------------------------------------------------------------- + +def _extract_last_reasoning(conversation_history: Any) -> str: + """Return the current turn's reasoning trace, or ``""`` when absent. - Fetches the current canvas tool list from the platform and registers - each as a sync handler that forwards calls over the WebSocket relay. - Also wires up bidirectional relay: AGENT_START injects user input into - hermes, and on_stream_token / post_llm_call hooks stream the response back. + Walks ``conversation_history`` backwards and picks the last assistant + message whose ``reasoning`` field is populated, stopping at the user + message that started the turn. Mirrors hermes' own extraction at + ``run_agent.py:14066-14072``. """ - api_key = os.environ.get("DOMYN_API_KEY", "") - base_url = os.environ.get("DOMYN_BASE_URL", "").rstrip("/") - space_id = os.environ.get("DOMYN_SPACE_ID", "") - channel_id = os.environ.get("DOMYN_CHANNEL_ID", "") - configuration_id = os.environ.get("DOMYN_CONFIGURATION_ID") or None - timeout = float(os.environ.get("PLATFORM_TOOL_TIMEOUT", "120")) - refresh_interval = float(os.environ.get("PLATFORM_TOOL_REFRESH_INTERVAL", "60")) + if not conversation_history: + return "" + for msg in reversed(list(conversation_history)): + if not isinstance(msg, dict): + continue + if msg.get("role") == "user": + break + if msg.get("role") != "assistant": + continue + reasoning = msg.get("reasoning") or msg.get("reasoning_content") + if not reasoning: + continue + if isinstance(reasoning, str): + return reasoning + if isinstance(reasoning, list): + parts: list[str] = [] + for item in reasoning: + if isinstance(item, str): + parts.append(item) + elif isinstance(item, dict): + text = item.get("text") or item.get("thinking") or "" + if text: + parts.append(str(text)) + if parts: + return "\n".join(parts) + return "" + + +# TODO(thought-process): drop the hardcoded placeholder once the active +# provider surfaces ``reasoning_content`` on assistant messages. +def _hardcoded_tool_thought(tool_name: str, args: Any) -> str: + """Placeholder ``ToolAction.thought`` text when no real reasoning is available.""" + try: + args_preview = ", ".join(f"{k}={v!r}" for k, v in (args or {}).items())[:200] + except Exception: + args_preview = "" + suffix = f" with {args_preview}" if args_preview else "" + return ( + f"[hardcoded placeholder] Hermes is invoking `{tool_name}`{suffix}. " + "Real reasoning will appear here once the LLM provider exposes " + "reasoning_content." + ) + + +# --------------------------------------------------------------------------- +# Hermes runtime lookups β€” read-only access to gateway internals +# --------------------------------------------------------------------------- + +def _runner_ref_or_none() -> Any: + """Return the live ``GatewayRunner`` instance, or None.""" + try: + from gateway.run import _gateway_runner_ref + except Exception: + return None + return _gateway_runner_ref() if _gateway_runner_ref else None + + +def _chat_id_and_thought_for_task(task_id: str) -> tuple[str | None, str]: + """Return ``(chat_id, current_thought)`` for the AIAgent driving *task_id*. + + Scans the gateway's ``_running_agents`` for the agent whose + ``_current_task_id`` matches, then pulls both its ``_chat_id`` and the + most-recent reasoning trace within the current turn so the platform + can render *why* hermes is invoking each tool, not just *that* it is. + Returns ``(None, "")`` when the lookup fails. + """ + runner = _runner_ref_or_none() + if runner is None: + return None, "" + running = getattr(runner, "_running_agents", None) or {} + for agent in running.values(): + if getattr(agent, "_current_task_id", None) != task_id: + continue + chat_id = getattr(agent, "_chat_id", None) + thought = _extract_last_reasoning(getattr(agent, "messages", None)) + return chat_id, thought + return None, "" + +# --------------------------------------------------------------------------- +# Async dispatch β€” hooks fire on hermes' worker thread; outbound sends +# must reach the relay client's loop or websockets raises about loop +# affinity. ``_schedule_on_gateway_loop`` is the cross-loop bridge. +# --------------------------------------------------------------------------- + +def _schedule_on_gateway_loop(coro: Any, *, label: str) -> None: + """Fire-and-forget schedule of *coro* on the gateway's event loop.""" + runner = _runner_ref_or_none() + gateway_loop = getattr(runner, "_gateway_loop", None) if runner else None + if gateway_loop is not None and not gateway_loop.is_closed(): + try: + asyncio.run_coroutine_threadsafe(coro, gateway_loop) + return + except Exception as exc: + logger.warning("platform-gateway: schedule %s failed - %s", label, exc) + # Same-loop fallback for tests / synchronous contexts. + try: + asyncio.get_event_loop().create_task(coro) + except Exception as exc: + logger.warning("platform-gateway: fallback %s dispatch failed - %s", label, exc) + + +# --------------------------------------------------------------------------- +# Refresh loop and tool handler factory β€” module-level so tests can stub +# the daemon thread and so ``RefreshLoop._refresh`` can rebuild handlers +# when new canvas tools appear. +# --------------------------------------------------------------------------- + +def _start_refresh_loop(**kwargs: Any) -> None: + """Indirection so tests can stub the daemon thread.""" + RefreshLoop(**kwargs).start() + + +def _make_tool_handler( + adapter_slot: list[Any], tool_name: str, timeout: float +) -> Callable[..., Any]: + """Build the async handler that bridges hermes' tool registry to the adapter. + + The registry only passes ``task_id`` to platform tool handlers (not + ``parent_agent``), so we look up the chat_id via the per-task stash + that ``pre_tool_call`` populated. + """ + + async def handler(args: dict, **kwargs: Any) -> str: + adapter = adapter_slot[0] + if adapter is None: + return json.dumps({"error": "platform-gateway: adapter not ready"}) + task_id = kwargs.get("task_id") or "" + chat_id = adapter._chat_id_by_task.get(task_id) + if not chat_id: + return json.dumps({"error": "platform-gateway: no chat_id for task_id"}) + session_key = adapter.session_key_for_chat(chat_id) + thought = adapter.thought_for_task(task_id) + return await adapter.call_tool( + session_key=session_key, + tool_name=tool_name, + args=args, + thought=thought, + timeout=timeout, + ) + + return handler + + +# --------------------------------------------------------------------------- +# Plugin entry point +# --------------------------------------------------------------------------- + +def register(ctx: Any) -> None: + """Plugin entry β€” called once by hermes' plugin loader at startup.""" missing = [v for v in _REQUIRED if not os.environ.get(v)] if missing: logger.warning( - "platform-gateway: skipping registration - missing env vars: %s", + "platform-gateway: skipping registration β€” missing env vars: %s", ", ".join(missing), ) return + api_key = os.environ["DOMYN_API_KEY"] + base_url = os.environ["DOMYN_BASE_URL"].rstrip("/") + space_id = os.environ["DOMYN_SPACE_ID"] + channel_id = os.environ["DOMYN_CHANNEL_ID"] + configuration_id = os.environ.get("DOMYN_CONFIGURATION_ID") or None + timeout = float(os.environ.get("PLATFORM_TOOL_TIMEOUT", "120")) + refresh_interval = float(os.environ.get("PLATFORM_TOOL_REFRESH_INTERVAL", "60")) + try: - from hermes_platform_gateway.client import fetch_tools raw_tools = fetch_tools( - base_url, - space_id, - channel_id, - api_key, + base_url, space_id, channel_id, api_key, configuration_id=configuration_id, ) except Exception as exc: logger.warning("platform-gateway: could not fetch tools - %s", exc) - return + raw_tools = [] - # _current_turn holds the AGENT_START event that triggered the current - # relay-driven turn so that streaming events and AGENT_END carry matching - # correlation IDs (author, interaction_id, turn_id, event_id). - _turn_lock = threading.Lock() - _current_turn: list[Any] = [None] # [BaseEvent | None] + # Adapter is built lazily by the factory so the gateway controls + # its lifecycle. We close over a one-slot list so the tool + # handlers (registered now) can find the live adapter once the + # factory has been called. + adapter_slot: list[Any] = [None] + ws_url = build_ws_url(base_url) + headers = {"channel-id": channel_id, "space-id": space_id, "api-key": api_key} - def _on_agent_start(event: Any) -> None: - text = _extract_user_text(event) - if not text: - logger.warning("platform-gateway: AGENT_START with no extractable text, skipping") - return - with _turn_lock: - _current_turn[0] = event - if not ctx.inject_message(text): - logger.warning("platform-gateway: inject_message failed (no CLI ref)") - return - logger.debug("platform-gateway: injected user message from relay") + def _factory(config: Any) -> Any: + from hermes_platform_gateway.adapter import DomynPlatformAdapter + from hermes_platform_gateway.relay_client import DomynRelayClient - try: - from hermes_platform_gateway.client import GatewayConnection, build_ws_url - ws_url = build_ws_url(base_url) - headers = {"channel-id": channel_id, "space-id": space_id, "api-key": api_key} - gateway = GatewayConnection( - ws_url=ws_url, - headers=headers, - timeout=timeout, - on_agent_start=_on_agent_start, + def relay_factory(on_event: Callable[[Any], Any]) -> Any: + return DomynRelayClient(ws_url=ws_url, headers=headers, on_event=on_event) + + adapter = DomynPlatformAdapter( + config=config, + channel_id=channel_id, + relay_client_factory=relay_factory, ) - gateway.start() - except Exception as exc: - logger.warning("platform-gateway: could not start WebSocket connection - %s", exc) - return + adapter_slot[0] = adapter + return adapter - from hermes_platform_gateway.schema import convert_schema + def _check() -> bool: + return all(os.environ.get(v) for v in _REQUIRED) + ctx.register_platform( + name="domyn", + label="Domyn", + adapter_factory=_factory, + check_fn=_check, + required_env=list(_REQUIRED), + allowed_users_env="DOMYN_ALLOWED_USERS", + allow_all_env="DOMYN_ALLOW_ALL_USERS", + ) + + # --- Tool registration --- registered_names: set[str] = set() for tool_def in raw_tools: name = tool_def.get("name") if not name: - logger.warning( - "platform-gateway: skipping tool with missing 'name': %r", tool_def - ) continue - try: schema = convert_schema(tool_def) except Exception as exc: - logger.warning( - "platform-gateway: skipping tool '%s' - schema error: %s", name, exc - ) + logger.warning("platform-gateway: skipping tool '%s' - schema error: %s", name, exc) continue - ctx.register_tool( name=name, toolset="platform", schema=schema, - handler=_make_handler(gateway, name, _current_turn, _turn_lock), - is_async=False, + handler=_make_tool_handler(adapter_slot, name, timeout), + is_async=True, ) registered_names.add(name) - logger.debug("platform-gateway: registered tool '%s'", name) + logger.info( + "platform-gateway: registered domyn adapter with %d platform tool(s): %s", + len(registered_names), sorted(registered_names), + ) + + # --- Hooks --- + # pre_tool_call bridges the AIAgent β†’ tool-handler gap: hermes' tool + # registry only passes ``task_id`` to the handler (not ``parent_agent``), + # so we scan _running_agents for the matching agent and stash its + # chat_id (+ current reasoning) under task_id. post_tool_call drops + # the stash and emits a visibility TOOL_END for built-in tools. + _platform_tool_names: set[str] = set(registered_names) + + def _on_pre_tool_call( + tool_name: str = "", + args: Any = None, + task_id: str = "", + **_: Any, + ) -> None: + adapter = adapter_slot[0] + if adapter is None or not task_id: + return + chat_id, thought = _chat_id_and_thought_for_task(task_id) + if not chat_id: + return - logger.info("platform-gateway: registered %d platform tool(s)", len(registered_names)) + is_platform = tool_name in _platform_tool_names + effective_thought = thought or _hardcoded_tool_thought(tool_name, args) + logger.debug( + "platform-gateway: pre_tool_call %s task_id=%s chat_id=%s is_platform=%s has_real_thought=%s", + tool_name, task_id, chat_id, is_platform, bool(thought), + ) + + # Always stash chat_id (+ thought) β€” post_tool_call reads this + # back regardless of branch. + adapter.record_task_chat( + task_id=task_id, chat_id=chat_id, thought=effective_thought, + ) + if is_platform: + # Real platform tool: the canonical TOOL_START goes out + # from adapter.call_tool with a pending future. Done here. + return + + # Built-in hermes tool: visibility-only TOOL_START. Generate a + # fresh call_id, stash it so post_tool_call can pair the TOOL_END. + call_id = f"visibility-{uuid.uuid4()}" + adapter.record_visibility_call(task_id=task_id, call_id=call_id) + _schedule_on_gateway_loop( + adapter.emit_visibility_tool_start( + chat_id=chat_id, + tool_name=tool_name, + args=args if isinstance(args, dict) else {}, + thought=effective_thought, + call_id=call_id, + ), + label=f"visibility TOOL_START {tool_name}", + ) + + def _on_post_tool_call( + tool_name: str = "", + task_id: str = "", + result: Any = None, + **_: Any, + ) -> None: + adapter = adapter_slot[0] + if adapter is None: + return + chat_id = adapter._chat_id_by_task.get(task_id) + visibility_call_id = adapter.pop_visibility_call(task_id) + adapter.forget_task(task_id=task_id) + + # Visibility TOOL_END only for built-in tools that emitted a + # matching visibility TOOL_START. Platform tools have their own + # TOOL_END round-trip from the relay; don't double-emit. + if not visibility_call_id or tool_name in _platform_tool_names: + return + if not chat_id: + return + _schedule_on_gateway_loop( + adapter.emit_visibility_tool_end( + chat_id=chat_id, + tool_name=tool_name, + call_id=visibility_call_id, + observation=result, + ), + label=f"visibility TOOL_END {tool_name}", + ) + + ctx.register_hook("pre_tool_call", _on_pre_tool_call) + ctx.register_hook("post_tool_call", _on_post_tool_call) + # END is emitted by the adapter's on_processing_complete override β€” + # see ``DomynPlatformAdapter.on_processing_complete`` for the + # rationale (post_llm_call/on_session_end both fire too early). + + # --- Tool list refresh --- if refresh_interval > 0: - from hermes_platform_gateway.client import RefreshLoop - RefreshLoop( + _start_refresh_loop( ctx=ctx, - handler_factory=lambda name: _make_handler( - gateway, name, _current_turn, _turn_lock - ), + handler_factory=lambda nm: _make_tool_handler(adapter_slot, nm, timeout), base_url=base_url, space_id=space_id, channel_id=channel_id, @@ -127,69 +379,4 @@ def _on_agent_start(event: Any) -> None: interval=refresh_interval, initial_names=registered_names, configuration_id=configuration_id, - ).start() - logger.debug("platform-gateway: canvas polling every %.0fs", refresh_interval) - - # --- Bidirectional relay hook -------------------------------------------- - # - # We deliberately do NOT stream tokens as RESPONSE(is_partial=True) events: - # the platform's relay treats each RESPONSE as its own block and joins - # them with newlines in the UI (and a non-empty AGENT_END would get - # promoted into a *second* full message via the - # "[DelegateAgent] Promoting AGENT_END with content to final RESPONSE" - # path). Instead we deliver one final AGENT_END carrying the full - # assistant text β€” same shape `domyn expose`'s Runtime uses. - - def _on_turn_complete( - assistant_response: str = "", - session_id: str | None = None, - **_: Any, - ) -> None: - from domyn_agents.core import BaseEvent, ExecutionEventType, Part - with _turn_lock: - turn = _current_turn[0] - _current_turn[0] = None - if turn is None: - return - event = BaseEvent( - event_type=ExecutionEventType.AGENT_END, - author=turn.author, - event_id=turn.event_id, - interaction_id=turn.interaction_id, - turn_id=turn.turn_id, - conversation_id=turn.conversation_id, - content=[Part(text=assistant_response)] if assistant_response else [], ) - gateway.send_event(event) - logger.debug("platform-gateway: sent AGENT_END for turn %s", turn.event_id) - - ctx.register_hook("post_llm_call", _on_turn_complete) - - -def _make_handler( - gateway: Any, - tool_name: str, - current_turn: list[Any], - turn_lock: threading.Lock, -) -> Any: - def handler(args: dict, **kwargs: Any) -> str: - with turn_lock: - turn = current_turn[0] - return gateway.call_tool(tool_name, args, turn=turn, **kwargs) - return handler - - -def _extract_user_text(event: Any) -> str: - """Extract plain user text from an AGENT_START relay event.""" - if event.action and event.action.parameters: - params = event.action.parameters - for key in ("input", "text", "message", "content"): - val = params.get(key) - if isinstance(val, str) and val: - return val - for part in event.content or []: - if getattr(part, "text", None): - return part.text - if event.action and event.action.parameters: - return json.dumps(event.action.parameters) - return "" diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py new file mode 100644 index 0000000..9c4fab7 --- /dev/null +++ b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py @@ -0,0 +1,555 @@ +"""DomynPlatformAdapter β€” gateway-mode bridge to the Domyn relay. + +Per turn: + AGENT_START (in) β†’ STARTED (out) β†’ handle_message dispatch + β†’ (tool calls round-trip via the relay) + β†’ AGENT_END (out, one per visible chat message) + β†’ END (out, exactly once, from on_processing_complete) + +Per platform tool call: + pre_tool_call hook stashes (chat_id, thought) by task_id + tool handler calls adapter.call_tool(session_key, tool_name, args, thought) + adapter emits TOOL_START, awaits TOOL_END/TOOL_ERROR from the relay + _resolve_tool_call wakes the awaiting handler via call_soon_threadsafe +""" +from __future__ import annotations + +import asyncio +import json +import logging +import uuid +from datetime import datetime +from typing import Any, Callable, Dict, Optional + +from gateway.platforms.base import ( + BasePlatformAdapter, + MessageEvent, + MessageType, + SendResult, +) +from gateway.session import SessionSource +from gateway.config import Platform, PlatformConfig +from gateway.platform_registry import PlatformEntry, platform_registry + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Module-level helpers +# --------------------------------------------------------------------------- + +def _build_session_key(channel_id: str, conversation_id: str) -> str: + return f"domyn:{channel_id}:{conversation_id}" + + +def _extract_user_text(event: Any) -> str: + """Pull plain user text out of an AGENT_START relay event.""" + action = getattr(event, "action", None) + params = getattr(action, "parameters", None) if action else None + if params: + for key in ("input", "text", "message", "content"): + val = params.get(key) + if isinstance(val, str) and val: + return val + for part in getattr(event, "content", None) or []: + text = getattr(part, "text", None) + if text: + return text + if params: + return json.dumps(params) + return "" + + +def _serialise_observation(observation: Any) -> str: + """Return *observation* as a JSON string (passthrough if already valid JSON).""" + if isinstance(observation, str): + try: + json.loads(observation) + return observation + except json.JSONDecodeError: + pass + return json.dumps(observation) + + +# Register "domyn" as a dynamic Platform value so ``Platform("domyn")`` +# resolves via the enum's _missing_() hook at adapter construction time. +# Module-level side-effect because we need it before any DomynPlatformAdapter +# instance is built (including the standalone test ones that don't go +# through ctx.register_platform). +def _ensure_domyn_registered() -> None: + if not platform_registry.is_registered("domyn"): + platform_registry.register( + PlatformEntry( + name="domyn", + label="Domyn", + adapter_factory=lambda cfg: None, + check_fn=lambda: True, + ) + ) + + +_ensure_domyn_registered() + + +# --------------------------------------------------------------------------- +# Adapter +# --------------------------------------------------------------------------- + +class DomynPlatformAdapter(BasePlatformAdapter): + """Bridges the Domyn relay WebSocket to hermes' gateway runner.""" + + # ===================================================================== + # Lifecycle (BasePlatformAdapter contract) + # ===================================================================== + + def __init__( + self, + config: PlatformConfig, + *, + channel_id: str, + relay_client_factory: Callable[[Callable[[Any], Any]], Any], + ) -> None: + super().__init__(config=config, platform=Platform("domyn")) + self._channel_id = channel_id + # Per-conversation state β€” keyed by adapter-internal session_key. + self._turn_by_session: Dict[str, Any] = {} + # Per-tool-call state β€” keyed by hermes' registry task_id. + self._chat_id_by_task: Dict[str, str] = {} + self._thought_by_task: Dict[str, str] = {} + # Per-call_id state β€” for visibility (built-in tools) vs real + # platform tool round-trips. Disjoint keyspaces. + self._visibility_call_id_by_task: Dict[str, str] = {} + self._pending_calls: Dict[str, Any] = {} # call_id -> (Future, loop) + self._client = relay_client_factory(self._on_event) + + async def connect(self) -> bool: + await self._client.connect() + self._mark_connected() + return True + + async def disconnect(self) -> None: + await self._client.disconnect() + self._fail_pending("disconnect") + self._mark_disconnected() + + async def send_typing(self, chat_id: str, metadata: Any = None) -> None: + return None + + async def get_chat_info(self, chat_id: str) -> Dict[str, Any]: + return {"name": chat_id, "type": "dm", "chat_id": chat_id} + + async def on_processing_complete( + self, event: "MessageEvent", outcome: Any + ) -> None: + """Emit the terminal END *after* the gateway's final send returns. + + ``BasePlatformAdapter._process_message_background`` calls this + hook after the final ``adapter.send`` returns (see + ``gateway/platforms/base.py:2964``), so it's the only place we + can emit END *after* the final AGENT_END rather than before. + """ + chat_id = getattr(getattr(event, "source", None), "chat_id", None) + if not chat_id: + return + turn = self._turn_by_session.get(_build_session_key(self._channel_id, chat_id)) + if turn is None: + return + await self.emit_end(turn=turn) + + # ===================================================================== + # Inbound events from the relay + # ===================================================================== + + async def _on_event(self, event: Any) -> None: + from domyn_agents.core import ExecutionEventType + + et = getattr(event, "event_type", None) + logger.debug( + "domyn-adapter: inbound event type=%s conversation_id=%s call_id=%s", + getattr(et, "value", et), + getattr(event, "conversation_id", None), + getattr(getattr(event, "action", None), "call_id", None), + ) + if et == ExecutionEventType.AGENT_START: + await self._handle_agent_start(event) + return + if et in (ExecutionEventType.TOOL_END, ExecutionEventType.TOOL_ERROR): + self._resolve_tool_call(event) + + async def _handle_agent_start(self, event: Any) -> None: + conv_id = getattr(event, "conversation_id", None) + if not conv_id: + logger.warning("domyn-adapter: AGENT_START missing conversation_id, dropping") + return + text = _extract_user_text(event) + if not text: + logger.warning("domyn-adapter: AGENT_START with no extractable text, dropping") + return + + self._turn_by_session[_build_session_key(self._channel_id, conv_id)] = event + + # Bookend: STARTED tells the UI "received, working" β€” sent + # BEFORE handle_message dispatches into the gateway so the + # signal is visible even on long first turns. + await self.emit_started(turn=event) + + source = SessionSource( + platform=Platform("domyn"), + chat_id=conv_id, + chat_name=conv_id, + chat_type="dm", + user_id=getattr(event, "author", None), + user_name=getattr(event, "author", None), + ) + msg = MessageEvent( + text=text, + message_type=MessageType.TEXT, + source=source, + message_id=getattr(event, "event_id", None), + timestamp=datetime.now(), + ) + await self.handle_message(msg) + + # ===================================================================== + # Outbound emits + # ===================================================================== + + async def send( + self, + chat_id: str, + content: str, + reply_to: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + ) -> SendResult: + """Emit AGENT_END for one visible chat message. + + Called multiple times per turn (interim narrative, "⏳ Still + working…" notifiers, the final reply). END is NOT emitted here + β€” that's per-turn, fired from ``on_processing_complete``. + """ + from domyn_agents.core import BaseEvent, ExecutionEventType, Part + + session_key = _build_session_key(self._channel_id, chat_id) + turn = self._turn_by_session.get(session_key) + if turn is None: + preview = (content or "")[:100] + logger.warning( + "domyn-adapter: send for %s with no prior AGENT_START (preview=%r)", + session_key, preview, + ) + return SendResult(success=False, error="no prior AGENT_START") + + # event_id is per-frame unique; let BaseEvent auto-generate. + # Copying turn.event_id collides with TOOL_START (also fires + # against the same turn) and confuses platforms that key on it. + event = BaseEvent( + event_type=ExecutionEventType.AGENT_END, + author=turn.author, + interaction_id=turn.interaction_id, + turn_id=turn.turn_id, + conversation_id=turn.conversation_id, + content=[Part(text=content)] if content else [], + ) + try: + await self._client.send_event(event) + except Exception as exc: + logger.warning("domyn-adapter: send_event failed - %s", exc) + return SendResult(success=False, error=str(exc), retryable=True) + return SendResult(success=True, message_id=event.event_id) + + async def emit_started(self, *, turn: Any) -> None: + """Push STARTED on AGENT_START receipt (fire-and-forget).""" + from domyn_agents.core import BaseEvent, ExecutionEventType + + event = BaseEvent( + event_type=ExecutionEventType.STARTED, + author=turn.author, + interaction_id=turn.interaction_id, + turn_id=turn.turn_id, + conversation_id=turn.conversation_id, + ) + try: + await self._client.send_event(event) + except Exception as exc: + logger.warning("domyn-adapter: emit_started failed - %s", exc) + + async def emit_end(self, *, turn: Any) -> None: + """Push END once per turn after AGENT_END has been delivered.""" + from domyn_agents.core import BaseEvent, ExecutionEventType + + event = BaseEvent( + event_type=ExecutionEventType.END, + author=turn.author, + interaction_id=turn.interaction_id, + turn_id=turn.turn_id, + conversation_id=turn.conversation_id, + ) + try: + await self._client.send_event(event) + except Exception as exc: + logger.warning("domyn-adapter: emit_end failed - %s", exc) + + async def emit_visibility_tool_start( + self, + *, + chat_id: str, + tool_name: str, + args: Dict[str, Any], + thought: Optional[str], + call_id: str, + ) -> None: + """Push TOOL_START for a hermes built-in tool (visibility only). + + No pending future is registered and no TOOL_END round-trip is + expected from the platform β€” the matching TOOL_END is emitted by + ``emit_visibility_tool_end`` from the post_tool_call hook. + """ + from domyn_agents.core import BaseEvent, ExecutionEventType, ToolAction + + turn = self._turn_by_session.get(_build_session_key(self._channel_id, chat_id)) + if turn is None: + return + event = BaseEvent( + event_type=ExecutionEventType.TOOL_START, + author=turn.author, + interaction_id=turn.interaction_id, + turn_id=turn.turn_id, + conversation_id=turn.conversation_id, + action=ToolAction( + name=tool_name, + parameters=args or {}, + call_id=call_id, + thought=thought, + ), + ) + try: + await self._client.send_event(event) + except Exception as exc: + logger.warning( + "domyn-adapter: emit_visibility_tool_start %s failed - %s", + tool_name, exc, + ) + + async def emit_visibility_tool_end( + self, + *, + chat_id: str, + tool_name: str, + call_id: str, + observation: Any, + ) -> None: + """Push TOOL_END for a hermes built-in tool (companion to start).""" + from domyn_agents.core import BaseEvent, ExecutionEventType, ToolAction + + turn = self._turn_by_session.get(_build_session_key(self._channel_id, chat_id)) + if turn is None: + return + event = BaseEvent( + event_type=ExecutionEventType.TOOL_END, + author=turn.author, + interaction_id=turn.interaction_id, + turn_id=turn.turn_id, + conversation_id=turn.conversation_id, + action=ToolAction( + name=tool_name, + parameters={}, + call_id=call_id, + observation=observation, + ), + ) + try: + await self._client.send_event(event) + except Exception as exc: + logger.warning( + "domyn-adapter: emit_visibility_tool_end %s failed - %s", + tool_name, exc, + ) + + # ===================================================================== + # Tool-call routing (real platform tools β€” round-trip via the relay) + # ===================================================================== + + async def call_tool( + self, + *, + session_key: str, + tool_name: str, + args: Dict[str, Any], + thought: Optional[str] = None, + timeout: float = 120.0, + ) -> str: + """Send TOOL_START, await TOOL_END/TOOL_ERROR, return the observation as JSON.""" + from domyn_agents.core import BaseEvent, ExecutionEventType, ToolAction + + turn = self._turn_by_session.get(session_key) + if turn is None: + return json.dumps({"error": "no active turn for session"}) + + call_id = str(uuid.uuid4()) + loop = asyncio.get_running_loop() + fut: asyncio.Future = loop.create_future() + # Stash (future, loop) β€” _resolve_tool_call runs on the relay + # client's loop, which may differ from this one, so we'll need + # call_soon_threadsafe to wake the awaiter. + self._pending_calls[call_id] = (fut, loop) + + start = BaseEvent( + event_type=ExecutionEventType.TOOL_START, + author=turn.author, + interaction_id=turn.interaction_id, + turn_id=turn.turn_id, + conversation_id=turn.conversation_id, + action=ToolAction( + name=tool_name, + parameters=args, + call_id=call_id, + thought=thought, + ), + ) + logger.debug( + "domyn-adapter: TOOL_START name=%s call_id=%s conversation_id=%s", + tool_name, call_id, turn.conversation_id, + ) + try: + await self._client.send_event(start) + except Exception as exc: + self._pending_calls.pop(call_id, None) + logger.warning( + "domyn-adapter: TOOL_START send failed for %s call_id=%s - %s", + tool_name, call_id, exc, + ) + return json.dumps({"error": f"send failed: {exc}"}) + + try: + observation = await asyncio.wait_for(fut, timeout=timeout) + logger.debug( + "domyn-adapter: TOOL_END received name=%s call_id=%s", + tool_name, call_id, + ) + return _serialise_observation(observation) + except asyncio.TimeoutError: + self._pending_calls.pop(call_id, None) + logger.warning( + "domyn-adapter: TOOL timeout name=%s call_id=%s after %.1fs", + tool_name, call_id, timeout, + ) + return json.dumps({"error": f"Tool '{tool_name}' timed out after {timeout}s"}) + except Exception as exc: + logger.warning( + "domyn-adapter: TOOL future raised name=%s call_id=%s - %s", + tool_name, call_id, exc, + ) + return json.dumps({"error": str(exc)}) + + def _resolve_tool_call(self, event: Any) -> None: + """Wake the awaiter in call_tool when TOOL_END/TOOL_ERROR arrives.""" + from domyn_agents.core import ExecutionEventType + + et_value = getattr(event.event_type, "value", event.event_type) + call_id = getattr(getattr(event, "action", None), "call_id", None) + if not call_id: + logger.warning("domyn-adapter: %s with no call_id, dropping", et_value) + return + entry = self._pending_calls.pop(call_id, None) + if entry is None: + logger.warning( + "domyn-adapter: %s call_id=%s has no pending future (pending=%s)", + et_value, call_id, sorted(self._pending_calls.keys()), + ) + return + fut, fut_loop = entry + if fut.done(): + logger.warning( + "domyn-adapter: %s call_id=%s future already resolved", + et_value, call_id, + ) + return + if event.event_type == ExecutionEventType.TOOL_ERROR: + msg = ( + getattr(event, "error_message", None) + or f"platform tool error ({getattr(event, 'error_code', '')})" + ) + self._resolve_future(fut, fut_loop, exc=RuntimeError(msg)) + else: + observation = getattr(getattr(event, "action", None), "observation", None) + self._resolve_future(fut, fut_loop, result=observation) + + def _fail_pending(self, reason: str) -> None: + """Fail every in-flight tool call β€” called on disconnect.""" + for fut, fut_loop in self._pending_calls.values(): + if fut.done(): + continue + fut_loop.call_soon_threadsafe(fut.set_exception, RuntimeError(reason)) + self._pending_calls.clear() + + @staticmethod + def _resolve_future( + fut: asyncio.Future, + fut_loop: asyncio.AbstractEventLoop, + *, + result: Any = None, + exc: Optional[BaseException] = None, + ) -> None: + """Resolve a Future from a potentially different event loop. + + The future is bound to the loop where ``call_tool`` ran (a + worker loop spawned by hermes' ``_run_async``). The relay + receive loop, which calls us, runs in the gateway's loop. + ``call_soon_threadsafe`` is the canonical cross-loop bridge. + """ + def _apply() -> None: + if fut.done(): + return + if exc is not None: + fut.set_exception(exc) + else: + fut.set_result(result) + try: + fut_loop.call_soon_threadsafe(_apply) + except RuntimeError: + # Worker loop closed already β€” nothing waiting. + pass + + # ===================================================================== + # Per-task bookkeeping (used by pre_tool_call / post_tool_call hooks) + # ===================================================================== + + def session_key_for_chat(self, chat_id: str) -> str: + """Derive the adapter's internal session_key from a chat_id.""" + return _build_session_key(self._channel_id, chat_id) + + def record_task_chat( + self, *, task_id: str, chat_id: str, thought: Optional[str] = None + ) -> None: + """Stash (chat_id, thought) for a registry task_id. + + The platform tool handler only receives ``task_id`` from + ``registry.dispatch`` (not ``parent_agent``), so the + pre_tool_call hook stores the chat_id under task_id here and the + handler reads it back. The optional ``thought`` rides on + ``ToolAction.thought`` so the platform can render *why* the tool + was invoked. + """ + if task_id and chat_id: + self._chat_id_by_task[task_id] = chat_id + if thought: + self._thought_by_task[task_id] = thought + + def forget_task(self, *, task_id: str) -> None: + """Drop the per-task stashes after the tool finishes.""" + if task_id: + self._chat_id_by_task.pop(task_id, None) + self._thought_by_task.pop(task_id, None) + + def thought_for_task(self, task_id: str) -> Optional[str]: + """Return the thought stashed by pre_tool_call, if any.""" + return self._thought_by_task.get(task_id) + + def record_visibility_call(self, *, task_id: str, call_id: str) -> None: + """Pair a visibility TOOL_START's call_id with task_id for post_tool_call.""" + if task_id and call_id: + self._visibility_call_id_by_task[task_id] = call_id + + def pop_visibility_call(self, task_id: str) -> Optional[str]: + """Return-and-clear the visibility call_id for *task_id*.""" + return self._visibility_call_id_by_task.pop(task_id, None) if task_id else None diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/client.py b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/client.py index 8b252b7..c4ad189 100644 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/client.py +++ b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/client.py @@ -1,14 +1,9 @@ """Platform relay client: tool discovery and WebSocket connection management.""" from __future__ import annotations -import asyncio -import concurrent.futures -import json import logging -import random import threading import time -import uuid from typing import Any, Callable import httpx @@ -90,218 +85,6 @@ def fetch_tools( raise ValueError(f"Unexpected tool list response shape: {type(data)}") -class GatewayConnection: - """Manages a persistent WebSocket to the platform relay. - - Spawns a daemon thread on start() that owns a single asyncio event loop. - That loop maintains the WebSocket connection with full-jitter exponential - backoff reconnection and a receive loop that resolves pending - concurrent.futures.Future objects when TOOL_END / TOOL_ERROR arrives. - - Tool handlers call call_tool() synchronously β€” it blocks on future.result() - until the platform responds. - """ - - def __init__( - self, - ws_url: str, - headers: dict[str, str], - timeout: float = 120.0, - on_agent_start: Callable[["BaseEvent"], None] | None = None, - ) -> None: - self._ws_url = ws_url - self._headers = headers - self._timeout = timeout - self._on_agent_start = on_agent_start - self._pending: dict[str, concurrent.futures.Future] = {} - self._lock = threading.Lock() - self._loop: asyncio.AbstractEventLoop | None = None - self._ws: Any = None - self._ready = threading.Event() - - def start(self) -> None: - """Spawn the background WebSocket thread and wait up to 15s for first connect.""" - thread = threading.Thread(target=self._run, daemon=True) - thread.start() - if not self._ready.wait(timeout=15): - logger.warning("platform-gateway: timed out waiting for initial WebSocket connection") - - def _run(self) -> None: - self._loop = asyncio.new_event_loop() - asyncio.set_event_loop(self._loop) - self._loop.run_until_complete(self._connect_loop()) - - async def _connect_loop(self) -> None: - import websockets - - attempt = 0 - while True: - try: - async with websockets.connect( - self._ws_url, additional_headers=self._headers - ) as ws: - self._ws = ws - self._ready.set() - attempt = 0 - await self._receive_loop(ws) - code = getattr(ws, "close_code", None) - reason = ( - getattr(ws, "close_reason", None) - or getattr(ws, "close_message", None) - ) - logger.info( - "platform-gateway: WebSocket closed by server (code=%s, reason=%r)", - code, reason, - ) - except Exception as exc: - logger.warning("platform-gateway: WebSocket error - %s", exc) - self._fail_pending("WebSocket disconnected") - self._ws = None - - delay = min(30.0, 0.5 * (2 ** attempt)) * (0.5 + 0.5 * random.random()) - logger.debug("platform-gateway: reconnecting in %.1fs", delay) - await asyncio.sleep(delay) - attempt = min(attempt + 1, 6) - - async def _receive_loop(self, ws: Any) -> None: - from domyn_agents.core import ExecutionEventType, RelayMessage - - async for raw in ws: - try: - msg = RelayMessage.model_validate_json(raw) - event = msg.payload - except Exception: - continue - - if event.event_type == ExecutionEventType.AGENT_START: - if self._on_agent_start is not None: - try: - self._on_agent_start(event) - except Exception as exc: - logger.warning("platform-gateway: on_agent_start callback failed - %s", exc) - continue - - if event.event_type not in ( - ExecutionEventType.TOOL_END, - ExecutionEventType.TOOL_ERROR, - ): - continue - - call_id = getattr(event.action, "call_id", None) if event.action else None - if not call_id: - continue - - with self._lock: - future = self._pending.pop(call_id, None) - - if future is None or future.done(): - continue - - if event.event_type == ExecutionEventType.TOOL_ERROR: - future.set_exception( - RuntimeError( - event.error_message or f"platform tool error ({event.error_code})" - ) - ) - else: - observation = ( - getattr(event.action, "observation", None) if event.action else None - ) - future.set_result(observation) - - def send_event(self, event: "BaseEvent") -> None: - """Send a relay event back to the platform (fire-and-forget).""" - if self._ws is None or self._loop is None: - return - from domyn_agents.core import RelayMessage - msg = RelayMessage(payload=event).model_dump_json() - try: - asyncio.run_coroutine_threadsafe(self._ws.send(msg), self._loop) - except Exception as exc: - logger.debug("platform-gateway: send_event failed - %s", exc) - - def _fail_pending(self, reason: str) -> None: - with self._lock: - for future in self._pending.values(): - if not future.done(): - future.set_exception(RuntimeError(reason)) - self._pending.clear() - - def call_tool( - self, - tool_name: str, - args: dict[str, Any], - *, - turn: Any = None, - **kwargs: Any, - ) -> str: - """Send TOOL_START and block until TOOL_END/TOOL_ERROR or timeout. - - ``turn`` is the originating AGENT_START :class:`BaseEvent`. When provided - we copy ``author``/``interaction_id``/``turn_id``/``conversation_id`` onto - the TOOL_START so the platform can route the call to the right session β€” - this mirrors ``Runtime.call_platform_tool`` in ``domyn-agents``. - - Always returns a JSON string. Never raises. - """ - from domyn_agents.core import BaseEvent, ExecutionEventType, RelayMessage, ToolAction - - if self._ws is None or self._loop is None: - return json.dumps({"error": "platform-gateway: WebSocket not connected"}) - - call_id = str(uuid.uuid4()) - future: concurrent.futures.Future = concurrent.futures.Future() - with self._lock: - self._pending[call_id] = future - - event = RelayMessage( - payload=BaseEvent( - event_type=ExecutionEventType.TOOL_START, - author=getattr(turn, "author", None) or "hermes", - interaction_id=getattr(turn, "interaction_id", None), - turn_id=getattr(turn, "turn_id", None), - conversation_id=getattr(turn, "conversation_id", None), - action=ToolAction(name=tool_name, parameters=args, call_id=call_id), - ) - ) - - try: - send_fut = asyncio.run_coroutine_threadsafe( - self._ws.send(event.model_dump_json()), - self._loop, - ) - send_fut.result(timeout=10) - except Exception as exc: - with self._lock: - self._pending.pop(call_id, None) - return json.dumps({"error": f"platform-gateway: send failed - {exc}"}) - - try: - observation = future.result(timeout=self._timeout) - return _serialize_observation(observation) - except concurrent.futures.TimeoutError: - with self._lock: - self._pending.pop(call_id, None) - return json.dumps({"error": f"Tool '{tool_name}' timed out after {self._timeout}s"}) - except Exception as exc: - return json.dumps({"error": str(exc)}) - - -def _serialize_observation(observation: Any) -> str: - """Serialize a platform tool observation to a JSON string. - - If observation is already a valid JSON string, return it as-is. - Otherwise wrap in {"result": observation}. - """ - if isinstance(observation, str): - try: - json.loads(observation) - return observation - except json.JSONDecodeError: - pass - return json.dumps({"result": observation}) - - def _deregister_tool(name: str) -> None: """Remove a tool from the hermes tool registry.""" try: @@ -393,7 +176,7 @@ def _refresh(self) -> None: toolset="platform", schema=schema, handler=self._handler_factory(name), - is_async=False, + is_async=True, ) logger.info( diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/relay_client.py b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/relay_client.py new file mode 100644 index 0000000..9461c38 --- /dev/null +++ b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/relay_client.py @@ -0,0 +1,133 @@ +"""Domyn relay WebSocket client. + +Pure transport: framing, connect, send_event, receive-loop dispatch. +No business logic β€” adapter.py owns event routing. +""" +from __future__ import annotations + +import asyncio +import logging +import random +from typing import Any, Awaitable, Callable + +logger = logging.getLogger(__name__) + + +def _backoff_delay(attempt: int, *, rng: Callable[[], float] = random.random) -> float: + """Full-jitter exponential backoff. + + delay = min(30, 0.5 * 2**attempt) * (0.5 + 0.5 * rng()) + """ + base = min(30.0, 0.5 * (2 ** attempt)) + return base * (0.5 + 0.5 * rng()) + + +class DomynRelayClient: + """Async WebSocket client for the Domyn relay. + + Owns the connect/reconnect loop and exposes ``send_event`` for outbound + frames. Inbound frames are passed to a caller-supplied async callback. + """ + + def __init__( + self, + ws_url: str, + headers: dict[str, str], + on_event: Callable[[Any], Awaitable[None]] | None = None, + ) -> None: + self._ws_url = ws_url + self._headers = headers + self._on_event = on_event + self._ws: Any = None + self._task: asyncio.Task | None = None + self._stop = asyncio.Event() + + async def send_event(self, event: Any) -> None: + from domyn_agents.core import RelayMessage + + if self._ws is None: + raise RuntimeError("DomynRelayClient: not connected") + msg = RelayMessage(payload=event).model_dump_json() + # Full outbound frame at WARNING so we can see exactly what hits the + # wire (event_type, correlation IDs, thought/text parts). Truncate to + # 1000 chars so long observations don't drown the log. + logger.warning("DomynRelayClient: outbound %s", msg[:1000]) + await self._ws.send(msg) + + async def _consume(self, ws: Any) -> None: + """Iterate frames, parse RelayMessage, dispatch to on_event.""" + from domyn_agents.core import RelayMessage + + async for raw in ws: + try: + msg = RelayMessage.model_validate_json(raw) + except Exception as exc: + # Include the raw frame (truncated) so timeouts caused by + # silently-dropped TOOL_END frames can be diagnosed. The + # platform's schema sometimes diverges from domyn-agents' + # ToolAction (e.g. missing/extra fields). + logger.warning( + "DomynRelayClient: dropping malformed frame: %s | raw=%s", + str(exc)[:200], + str(raw)[:500], + ) + continue + if self._on_event is None: + continue + try: + await self._on_event(msg.payload) + except Exception as exc: + logger.warning("DomynRelayClient: on_event raised: %s", exc) + + async def connect(self) -> None: + """Spawn the connect loop task. Returns immediately.""" + if self._task is not None and not self._task.done(): + return + self._stop.clear() + self._task = asyncio.create_task(self._connect_loop()) + + async def disconnect(self) -> None: + """Stop the connect loop and close the active socket.""" + self._stop.set() + ws, self._ws = self._ws, None + if ws is not None: + try: + await ws.close() + except Exception: + pass + if self._task is not None: + self._task.cancel() + try: + await self._task + except (asyncio.CancelledError, Exception): + pass + self._task = None + + async def _connect_loop(self) -> None: + import websockets + + attempt = 0 + while not self._stop.is_set(): + try: + async with websockets.connect( + self._ws_url, additional_headers=self._headers + ) as ws: + self._ws = ws + attempt = 0 + await self._consume(ws) + except Exception as exc: + logger.warning("DomynRelayClient: connection error - %s", exc) + finally: + self._ws = None + + if self._stop.is_set(): + break + + delay = _backoff_delay(attempt) + logger.debug("DomynRelayClient: reconnecting in %.1fs", delay) + try: + await asyncio.wait_for(self._stop.wait(), timeout=delay) + break # stop requested during sleep + except asyncio.TimeoutError: + pass + attempt = min(attempt + 1, 6) diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/plugin.yaml b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/plugin.yaml index b6a121a..09f1994 100644 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/plugin.yaml +++ b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/plugin.yaml @@ -1,3 +1,3 @@ name: hermes_platform_gateway -version: "1.0" -description: Domyn platform gateway β€” discovers canvas tools and bridges hermes ⇄ relay WebSocket +version: "2.0" +description: Domyn platform gateway β€” multi-conversation adapter; bridges hermes gateway ⇄ Domyn relay WebSocket diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/pyproject.toml b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/pyproject.toml index ad9bd00..8b55654 100644 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/pyproject.toml +++ b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "hermes-platform-gateway" -version = "0.2.0" +version = "0.3.0" description = "Dynamic platform tool gateway plugin for hermes-agent" requires-python = ">=3.11" dependencies = [ From 50e9c2d0350d5ada0161ca5f7fc55dd36a6d165a Mon Sep 17 00:00:00 2001 From: lbenzoni Date: Mon, 25 May 2026 16:25:37 +0200 Subject: [PATCH 2/9] fix: adjust to domyn-agent linting --- .../hermes_platform_gateway/__init__.py | 39 +++++--- .../hermes_platform_gateway/adapter.py | 94 +++++++++++-------- .../hermes_platform_gateway/client.py | 9 +- .../hermes_platform_gateway/relay_client.py | 21 ++--- .../hermes_platform_gateway/schema.py | 1 + 5 files changed, 99 insertions(+), 65 deletions(-) diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/__init__.py b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/__init__.py index 1b1ca0e..e372bb6 100644 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/__init__.py +++ b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/__init__.py @@ -4,6 +4,7 @@ adapter opens one WebSocket per worker to the Domyn relay and routes each ``conversation_id`` to its own hermes session. """ + from __future__ import annotations import asyncio @@ -11,14 +12,15 @@ import logging import os import uuid -from typing import Any, Callable +from collections.abc import Callable +from typing import Any -from hermes_platform_gateway.client import ( - fetch_tools, - build_ws_url, +from .client import ( RefreshLoop, + build_ws_url, + fetch_tools, ) -from hermes_platform_gateway.schema import convert_schema +from .schema import convert_schema logger = logging.getLogger(__name__) @@ -34,6 +36,7 @@ # Helpers β€” pure functions, no adapter / hermes state # --------------------------------------------------------------------------- + def _extract_last_reasoning(conversation_history: Any) -> str: """Return the current turn's reasoning trace, or ``""`` when absent. @@ -90,6 +93,7 @@ def _hardcoded_tool_thought(tool_name: str, args: Any) -> str: # Hermes runtime lookups β€” read-only access to gateway internals # --------------------------------------------------------------------------- + def _runner_ref_or_none() -> Any: """Return the live ``GatewayRunner`` instance, or None.""" try: @@ -127,6 +131,7 @@ def _chat_id_and_thought_for_task(task_id: str) -> tuple[str | None, str]: # affinity. ``_schedule_on_gateway_loop`` is the cross-loop bridge. # --------------------------------------------------------------------------- + def _schedule_on_gateway_loop(coro: Any, *, label: str) -> None: """Fire-and-forget schedule of *coro* on the gateway's event loop.""" runner = _runner_ref_or_none() @@ -150,6 +155,7 @@ def _schedule_on_gateway_loop(coro: Any, *, label: str) -> None: # when new canvas tools appear. # --------------------------------------------------------------------------- + def _start_refresh_loop(**kwargs: Any) -> None: """Indirection so tests can stub the daemon thread.""" RefreshLoop(**kwargs).start() @@ -190,6 +196,7 @@ async def handler(args: dict, **kwargs: Any) -> str: # Plugin entry point # --------------------------------------------------------------------------- + def register(ctx: Any) -> None: """Plugin entry β€” called once by hermes' plugin loader at startup.""" missing = [v for v in _REQUIRED if not os.environ.get(v)] @@ -210,7 +217,10 @@ def register(ctx: Any) -> None: try: raw_tools = fetch_tools( - base_url, space_id, channel_id, api_key, + base_url, + space_id, + channel_id, + api_key, configuration_id=configuration_id, ) except Exception as exc: @@ -226,8 +236,8 @@ def register(ctx: Any) -> None: headers = {"channel-id": channel_id, "space-id": space_id, "api-key": api_key} def _factory(config: Any) -> Any: - from hermes_platform_gateway.adapter import DomynPlatformAdapter - from hermes_platform_gateway.relay_client import DomynRelayClient + from .adapter import DomynPlatformAdapter + from .relay_client import DomynRelayClient def relay_factory(on_event: Callable[[Any], Any]) -> Any: return DomynRelayClient(ws_url=ws_url, headers=headers, on_event=on_event) @@ -274,7 +284,8 @@ def _check() -> bool: registered_names.add(name) logger.info( "platform-gateway: registered domyn adapter with %d platform tool(s): %s", - len(registered_names), sorted(registered_names), + len(registered_names), + sorted(registered_names), ) # --- Hooks --- @@ -302,13 +313,19 @@ def _on_pre_tool_call( effective_thought = thought or _hardcoded_tool_thought(tool_name, args) logger.debug( "platform-gateway: pre_tool_call %s task_id=%s chat_id=%s is_platform=%s has_real_thought=%s", - tool_name, task_id, chat_id, is_platform, bool(thought), + tool_name, + task_id, + chat_id, + is_platform, + bool(thought), ) # Always stash chat_id (+ thought) β€” post_tool_call reads this # back regardless of branch. adapter.record_task_chat( - task_id=task_id, chat_id=chat_id, thought=effective_thought, + task_id=task_id, + chat_id=chat_id, + thought=effective_thought, ) if is_platform: diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py index 9c4fab7..57a2b89 100644 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py +++ b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py @@ -12,15 +12,20 @@ adapter emits TOOL_START, awaits TOOL_END/TOOL_ERROR from the relay _resolve_tool_call wakes the awaiting handler via call_soon_threadsafe """ + from __future__ import annotations import asyncio +import contextlib import json import logging import uuid +from collections.abc import Callable from datetime import datetime -from typing import Any, Callable, Dict, Optional +from typing import Any +from gateway.config import Platform, PlatformConfig +from gateway.platform_registry import PlatformEntry, platform_registry from gateway.platforms.base import ( BasePlatformAdapter, MessageEvent, @@ -28,8 +33,6 @@ SendResult, ) from gateway.session import SessionSource -from gateway.config import Platform, PlatformConfig -from gateway.platform_registry import PlatformEntry, platform_registry logger = logging.getLogger(__name__) @@ -38,6 +41,7 @@ # Module-level helpers # --------------------------------------------------------------------------- + def _build_session_key(channel_id: str, conversation_id: str) -> str: return f"domyn:{channel_id}:{conversation_id}" @@ -95,6 +99,7 @@ def _ensure_domyn_registered() -> None: # Adapter # --------------------------------------------------------------------------- + class DomynPlatformAdapter(BasePlatformAdapter): """Bridges the Domyn relay WebSocket to hermes' gateway runner.""" @@ -112,14 +117,14 @@ def __init__( super().__init__(config=config, platform=Platform("domyn")) self._channel_id = channel_id # Per-conversation state β€” keyed by adapter-internal session_key. - self._turn_by_session: Dict[str, Any] = {} + self._turn_by_session: dict[str, Any] = {} # Per-tool-call state β€” keyed by hermes' registry task_id. - self._chat_id_by_task: Dict[str, str] = {} - self._thought_by_task: Dict[str, str] = {} + self._chat_id_by_task: dict[str, str] = {} + self._thought_by_task: dict[str, str] = {} # Per-call_id state β€” for visibility (built-in tools) vs real # platform tool round-trips. Disjoint keyspaces. - self._visibility_call_id_by_task: Dict[str, str] = {} - self._pending_calls: Dict[str, Any] = {} # call_id -> (Future, loop) + self._visibility_call_id_by_task: dict[str, str] = {} + self._pending_calls: dict[str, Any] = {} # call_id -> (Future, loop) self._client = relay_client_factory(self._on_event) async def connect(self) -> bool: @@ -135,12 +140,10 @@ async def disconnect(self) -> None: async def send_typing(self, chat_id: str, metadata: Any = None) -> None: return None - async def get_chat_info(self, chat_id: str) -> Dict[str, Any]: + async def get_chat_info(self, chat_id: str) -> dict[str, Any]: return {"name": chat_id, "type": "dm", "chat_id": chat_id} - async def on_processing_complete( - self, event: "MessageEvent", outcome: Any - ) -> None: + async def on_processing_complete(self, event: MessageEvent, outcome: Any) -> None: """Emit the terminal END *after* the gateway's final send returns. ``BasePlatformAdapter._process_message_background`` calls this @@ -218,8 +221,8 @@ async def send( self, chat_id: str, content: str, - reply_to: Optional[str] = None, - metadata: Optional[Dict[str, Any]] = None, + reply_to: str | None = None, + metadata: dict[str, Any] | None = None, ) -> SendResult: """Emit AGENT_END for one visible chat message. @@ -235,7 +238,8 @@ async def send( preview = (content or "")[:100] logger.warning( "domyn-adapter: send for %s with no prior AGENT_START (preview=%r)", - session_key, preview, + session_key, + preview, ) return SendResult(success=False, error="no prior AGENT_START") @@ -294,8 +298,8 @@ async def emit_visibility_tool_start( *, chat_id: str, tool_name: str, - args: Dict[str, Any], - thought: Optional[str], + args: dict[str, Any], + thought: str | None, call_id: str, ) -> None: """Push TOOL_START for a hermes built-in tool (visibility only). @@ -327,7 +331,8 @@ async def emit_visibility_tool_start( except Exception as exc: logger.warning( "domyn-adapter: emit_visibility_tool_start %s failed - %s", - tool_name, exc, + tool_name, + exc, ) async def emit_visibility_tool_end( @@ -362,7 +367,8 @@ async def emit_visibility_tool_end( except Exception as exc: logger.warning( "domyn-adapter: emit_visibility_tool_end %s failed - %s", - tool_name, exc, + tool_name, + exc, ) # ===================================================================== @@ -374,8 +380,8 @@ async def call_tool( *, session_key: str, tool_name: str, - args: Dict[str, Any], - thought: Optional[str] = None, + args: dict[str, Any], + thought: str | None = None, timeout: float = 120.0, ) -> str: """Send TOOL_START, await TOOL_END/TOOL_ERROR, return the observation as JSON.""" @@ -408,7 +414,9 @@ async def call_tool( ) logger.debug( "domyn-adapter: TOOL_START name=%s call_id=%s conversation_id=%s", - tool_name, call_id, turn.conversation_id, + tool_name, + call_id, + turn.conversation_id, ) try: await self._client.send_event(start) @@ -416,7 +424,9 @@ async def call_tool( self._pending_calls.pop(call_id, None) logger.warning( "domyn-adapter: TOOL_START send failed for %s call_id=%s - %s", - tool_name, call_id, exc, + tool_name, + call_id, + exc, ) return json.dumps({"error": f"send failed: {exc}"}) @@ -424,20 +434,25 @@ async def call_tool( observation = await asyncio.wait_for(fut, timeout=timeout) logger.debug( "domyn-adapter: TOOL_END received name=%s call_id=%s", - tool_name, call_id, + tool_name, + call_id, ) return _serialise_observation(observation) - except asyncio.TimeoutError: + except TimeoutError: self._pending_calls.pop(call_id, None) logger.warning( "domyn-adapter: TOOL timeout name=%s call_id=%s after %.1fs", - tool_name, call_id, timeout, + tool_name, + call_id, + timeout, ) return json.dumps({"error": f"Tool '{tool_name}' timed out after {timeout}s"}) except Exception as exc: logger.warning( "domyn-adapter: TOOL future raised name=%s call_id=%s - %s", - tool_name, call_id, exc, + tool_name, + call_id, + exc, ) return json.dumps({"error": str(exc)}) @@ -454,14 +469,17 @@ def _resolve_tool_call(self, event: Any) -> None: if entry is None: logger.warning( "domyn-adapter: %s call_id=%s has no pending future (pending=%s)", - et_value, call_id, sorted(self._pending_calls.keys()), + et_value, + call_id, + sorted(self._pending_calls.keys()), ) return fut, fut_loop = entry if fut.done(): logger.warning( "domyn-adapter: %s call_id=%s future already resolved", - et_value, call_id, + et_value, + call_id, ) return if event.event_type == ExecutionEventType.TOOL_ERROR: @@ -488,7 +506,7 @@ def _resolve_future( fut_loop: asyncio.AbstractEventLoop, *, result: Any = None, - exc: Optional[BaseException] = None, + exc: BaseException | None = None, ) -> None: """Resolve a Future from a potentially different event loop. @@ -497,6 +515,7 @@ def _resolve_future( receive loop, which calls us, runs in the gateway's loop. ``call_soon_threadsafe`` is the canonical cross-loop bridge. """ + def _apply() -> None: if fut.done(): return @@ -504,11 +523,10 @@ def _apply() -> None: fut.set_exception(exc) else: fut.set_result(result) - try: + + # Worker loop closed already β€” nothing waiting. + with contextlib.suppress(RuntimeError): fut_loop.call_soon_threadsafe(_apply) - except RuntimeError: - # Worker loop closed already β€” nothing waiting. - pass # ===================================================================== # Per-task bookkeeping (used by pre_tool_call / post_tool_call hooks) @@ -518,9 +536,7 @@ def session_key_for_chat(self, chat_id: str) -> str: """Derive the adapter's internal session_key from a chat_id.""" return _build_session_key(self._channel_id, chat_id) - def record_task_chat( - self, *, task_id: str, chat_id: str, thought: Optional[str] = None - ) -> None: + def record_task_chat(self, *, task_id: str, chat_id: str, thought: str | None = None) -> None: """Stash (chat_id, thought) for a registry task_id. The platform tool handler only receives ``task_id`` from @@ -541,7 +557,7 @@ def forget_task(self, *, task_id: str) -> None: self._chat_id_by_task.pop(task_id, None) self._thought_by_task.pop(task_id, None) - def thought_for_task(self, task_id: str) -> Optional[str]: + def thought_for_task(self, task_id: str) -> str | None: """Return the thought stashed by pre_tool_call, if any.""" return self._thought_by_task.get(task_id) @@ -550,6 +566,6 @@ def record_visibility_call(self, *, task_id: str, call_id: str) -> None: if task_id and call_id: self._visibility_call_id_by_task[task_id] = call_id - def pop_visibility_call(self, task_id: str) -> Optional[str]: + def pop_visibility_call(self, task_id: str) -> str | None: """Return-and-clear the visibility call_id for *task_id*.""" return self._visibility_call_id_by_task.pop(task_id, None) if task_id else None diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/client.py b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/client.py index c4ad189..61f138d 100644 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/client.py +++ b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/client.py @@ -1,10 +1,12 @@ """Platform relay client: tool discovery and WebSocket connection management.""" + from __future__ import annotations import logging import threading import time -from typing import Any, Callable +from collections.abc import Callable +from typing import Any import httpx @@ -27,7 +29,7 @@ def build_ws_url(base_url: str) -> str: """ for prefix in ("https://", "http://"): if base_url.startswith(prefix): - base_url = base_url[len(prefix):] + base_url = base_url[len(prefix) :] break base_url = base_url.rstrip("/") scheme = "ws" if _is_localhost(base_url) else "wss" @@ -89,6 +91,7 @@ def _deregister_tool(name: str) -> None: """Remove a tool from the hermes tool registry.""" try: from tools.registry import registry + registry.deregister(name) logger.info("platform-gateway: deregistered tool '%s'", name) except ImportError: @@ -139,7 +142,7 @@ def _run(self) -> None: self._refresh() def _refresh(self) -> None: - from hermes_platform_gateway.schema import convert_schema + from .schema import convert_schema try: raw_tools = fetch_tools( diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/relay_client.py b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/relay_client.py index 9461c38..0869035 100644 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/relay_client.py +++ b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/relay_client.py @@ -3,12 +3,15 @@ Pure transport: framing, connect, send_event, receive-loop dispatch. No business logic β€” adapter.py owns event routing. """ + from __future__ import annotations import asyncio +import contextlib import logging import random -from typing import Any, Awaitable, Callable +from collections.abc import Awaitable, Callable +from typing import Any logger = logging.getLogger(__name__) @@ -18,7 +21,7 @@ def _backoff_delay(attempt: int, *, rng: Callable[[], float] = random.random) -> delay = min(30, 0.5 * 2**attempt) * (0.5 + 0.5 * rng()) """ - base = min(30.0, 0.5 * (2 ** attempt)) + base = min(30.0, 0.5 * (2**attempt)) return base * (0.5 + 0.5 * rng()) @@ -91,16 +94,12 @@ async def disconnect(self) -> None: self._stop.set() ws, self._ws = self._ws, None if ws is not None: - try: + with contextlib.suppress(Exception): await ws.close() - except Exception: - pass if self._task is not None: self._task.cancel() - try: + with contextlib.suppress(asyncio.CancelledError, Exception): await self._task - except (asyncio.CancelledError, Exception): - pass self._task = None async def _connect_loop(self) -> None: @@ -109,9 +108,7 @@ async def _connect_loop(self) -> None: attempt = 0 while not self._stop.is_set(): try: - async with websockets.connect( - self._ws_url, additional_headers=self._headers - ) as ws: + async with websockets.connect(self._ws_url, additional_headers=self._headers) as ws: self._ws = ws attempt = 0 await self._consume(ws) @@ -128,6 +125,6 @@ async def _connect_loop(self) -> None: try: await asyncio.wait_for(self._stop.wait(), timeout=delay) break # stop requested during sleep - except asyncio.TimeoutError: + except TimeoutError: pass attempt = min(attempt + 1, 6) diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/schema.py b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/schema.py index 18e51fa..59ae82b 100644 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/schema.py +++ b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/schema.py @@ -1,4 +1,5 @@ """Convert platform tool definitions to hermes JSON Schema format.""" + from __future__ import annotations _TYPE_MAP: dict[str, str] = { From ba42948f75529a6c98ae41bdb62eba13e1129b80 Mon Sep 17 00:00:00 2001 From: lbenzoni Date: Tue, 26 May 2026 12:03:07 +0200 Subject: [PATCH 3/9] feat: updated adapter to handle both response and agent end --- services/hermes_platform_gateway/Dockerfile | 11 +++- .../hermes-config.yaml | 21 +++---- .../hermes_platform_gateway/locales/en.yaml | 55 +++++++++++++++++++ .../hermes_platform_gateway/adapter.py | 29 ++++++++-- 4 files changed, 96 insertions(+), 20 deletions(-) create mode 100644 services/hermes_platform_gateway/locales/en.yaml diff --git a/services/hermes_platform_gateway/Dockerfile b/services/hermes_platform_gateway/Dockerfile index 98dba79..3b68cd0 100644 --- a/services/hermes_platform_gateway/Dockerfile +++ b/services/hermes_platform_gateway/Dockerfile @@ -42,7 +42,14 @@ RUN pip install --no-cache-dir /opt/hermes-platform-gateway && \ cp /opt/hermes-platform-gateway/plugin.yaml \ ${HERMES_HOME}/plugins/hermes_platform_gateway/ -# 5) hermes config template + entrypoint that materialises it. +# 5) i18n catalog β€” upstream hermes' pip install does NOT package its +# locales/ directory, so every t() call falls back to the raw key +# (you'd see "gateway.approve.session_singular" rendered verbatim in +# chat replies to /approve). We drop our own minimal en.yaml next to +# the installed agent/ package so agent.i18n._locales_dir() finds it. +COPY locales/en.yaml /usr/local/lib/python3.11/site-packages/locales/en.yaml + +# 6) hermes config template + entrypoint that materialises it. # gateway mode reads ~/.hermes/config.yaml via read_raw_config() which # does NOT expand ${VAR} references β€” so we resolve them at container # start with entrypoint.sh and write the result into $HERMES_HOME. @@ -50,7 +57,7 @@ COPY hermes-config.yaml /opt/hermes-config.template.yaml COPY entrypoint.sh /usr/local/bin/entrypoint.sh RUN chmod +x /usr/local/bin/entrypoint.sh -# 6) Run hermes in gateway mode. The gateway is the headless, multi-session +# 7) Run hermes in gateway mode. The gateway is the headless, multi-session # runner β€” it doesn't open a TUI and doesn't need a TTY. AGENT_START # events for any conversation_id arriving on the subscribed channel are # routed by the gateway to a per-conversation hermes session. diff --git a/services/hermes_platform_gateway/hermes-config.yaml b/services/hermes_platform_gateway/hermes-config.yaml index 4e0bb46..ab47eb7 100644 --- a/services/hermes_platform_gateway/hermes-config.yaml +++ b/services/hermes_platform_gateway/hermes-config.yaml @@ -26,6 +26,9 @@ agent: # messages into the chat. Default 180s; set 0 to disable. Without this # users see "Still working..." every 3 minutes during long turns. gateway_notify_interval: 0 + reasoning_effort: "medium" # empty = medium (default). Options: none, minimal, low, medium, high, xhigh (max) + + display: # How hermes handles a new message that arrives during an active turn. @@ -34,15 +37,9 @@ display: # the new message behind the current one; nothing visible until the # current turn completes. "steer" injects as additional context. busy_input_mode: queue - # Suppress hermes' mid-turn narrative chat messages ("Ti aiuto a - # creare uno script…", "Let me check the docs first…", etc). - # Default true β†’ hermes calls `interim_assistant_callback` for every - # bit of LLM text that lands alongside a tool call, each becoming its - # own outbound AGENT_END frame. Apps that treat AGENT_END as the - # turn-terminator break when an interim frame arrives mid-turn. - # With false hermes stays silent until the final reply, so each turn - # produces exactly one AGENT_END (the deliverable) followed by END. - # Trade-off: long turns appear silent in the chat until the final - # answer lands β€” TOOL_START/TOOL_END visibility frames still go out, - # which is what keeps the UI alive in the meantime. - interim_assistant_messages: false + # Mid-turn narrative ("Ti aiuto a creare uno script…") is enabled. + # The adapter routes these to LLM_END events (not AGENT_END), so + # apps that treat AGENT_END as the turn-terminator stay correct β€” + # only the final response and slash-command replies emit AGENT_END. + # See ``DomynPlatformAdapter.send`` for the routing logic. + interim_assistant_messages: true diff --git a/services/hermes_platform_gateway/locales/en.yaml b/services/hermes_platform_gateway/locales/en.yaml new file mode 100644 index 0000000..b5d838e --- /dev/null +++ b/services/hermes_platform_gateway/locales/en.yaml @@ -0,0 +1,55 @@ +# Minimal i18n catalog for the Domyn worker. +# +# Hermes' ``pip install`` from git doesn't package the ``locales/`` +# directory shipped in the repo, so the in-container i18n catalog is +# missing and every ``t()`` call falls back to the raw key (you'd see +# things like ``gateway.approve.session_singular`` rendered verbatim in +# chat). We bake this file in via the Dockerfile to backfill the keys +# hermes' gateway emits during normal slash-command operation. +# +# Scope is deliberately narrow β€” we ship only the keys that surface as +# user-facing text in this deployment (approval / deny flows, draining, +# expired-approval notices, the bundled ``approval.*`` prompt strings). +# Other ``t()`` calls hermes makes will still fall back to the raw key +# until upstream fixes its packaging or we expand this file. + +approval: + dangerous_header: "⚠️ DANGEROUS COMMAND: {description}" + choose_long: " [o]nce | [s]ession | [a]lways | [d]eny" + choose_short: " [o]nce | [s]ession | [d]eny" + prompt_long: " Choice [o/s/a/D]: " + prompt_short: " Choice [o/s/D]: " + timeout: " ⏱ Timeout - denying command" + allowed_once: " βœ“ Allowed once" + allowed_session: " βœ“ Allowed for this session" + allowed_always: " βœ“ Added to permanent allowlist" + denied: " βœ— Denied" + cancelled: " βœ— Cancelled" + blocklist_message: "This command is on the unconditional blocklist and cannot be approved." + +gateway: + approval_expired: "⚠️ Approval expired (agent is no longer waiting). Ask the agent to try again." + draining: "⏳ Draining {count} active agent(s) before restart..." + goal_cleared: "βœ“ Goal cleared." + no_active_goal: "No active goal." + config_read_failed: "⚠️ Could not read config.yaml: {error}" + config_save_failed: "⚠️ Could not save config: {error}" + + # /approve replies β€” key shape is gateway.approve.{choice}_{plural} + # where choice ∈ {once, session, always}, plural ∈ {singular, plural}. + # See gateway/run.py around line 13455. + approve: + no_pending: "No pending command to approve." + once_singular: "βœ… Command approved. The agent is resuming..." + once_plural: "βœ… {count} commands approved. The agent is resuming..." + session_singular: "βœ… Command approved (pattern approved for this session). The agent is resuming..." + session_plural: "βœ… {count} commands approved (pattern approved for this session). The agent is resuming..." + always_singular: "βœ… Command approved (pattern approved permanently). The agent is resuming..." + always_plural: "βœ… {count} commands approved (pattern approved permanently). The agent is resuming..." + + # /deny replies β€” symmetric shape to /approve. + deny: + no_pending: "No pending command to deny." + stale: "❌ Command denied (approval was stale)." + denied_singular: "❌ Command denied. The agent is resuming with a BLOCKED result." + denied_plural: "❌ {count} commands denied. The agent is resuming with BLOCKED results." diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py index 57a2b89..6bdc7c3 100644 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py +++ b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py @@ -224,16 +224,27 @@ async def send( reply_to: str | None = None, metadata: dict[str, Any] | None = None, ) -> SendResult: - """Emit AGENT_END for one visible chat message. - - Called multiple times per turn (interim narrative, "⏳ Still - working…" notifiers, the final reply). END is NOT emitted here - β€” that's per-turn, fired from ``on_processing_complete``. + """Emit either RESPONSE (interim narrative) or AGENT_END (final). + + Routing is by ``reply_to``: + - ``reply_to is None``: hermes' ``_interim_assistant_callback`` + calls ``adapter.send`` directly (mid-turn narrative like "I'll + do X, then Y"). We emit ``RESPONSE`` with ``is_partial=True`` + β€” semantically "incremental assistant text", not a thought, + and no need for a paired LLM_START. + - ``reply_to is not None``: the base class' ``_send_with_retry`` + anchors final responses (and slash-command replies) to the + triggering message_id. We emit ``AGENT_END``. + + END is NOT emitted here β€” that's per-turn, fired from + ``on_processing_complete``. """ from domyn_agents.core import BaseEvent, ExecutionEventType, Part session_key = _build_session_key(self._channel_id, chat_id) turn = self._turn_by_session.get(session_key) + logger.warning("EVENT CONTENT: %s", content) + logger.warning("EVENT METADATA: %s", metadata) if turn is None: preview = (content or "")[:100] logger.warning( @@ -243,16 +254,22 @@ async def send( ) return SendResult(success=False, error="no prior AGENT_START") + is_final = reply_to is not None + event_type = ( + ExecutionEventType.AGENT_END if is_final + else ExecutionEventType.RESPONSE + ) # event_id is per-frame unique; let BaseEvent auto-generate. # Copying turn.event_id collides with TOOL_START (also fires # against the same turn) and confuses platforms that key on it. event = BaseEvent( - event_type=ExecutionEventType.AGENT_END, + event_type=event_type, author=turn.author, interaction_id=turn.interaction_id, turn_id=turn.turn_id, conversation_id=turn.conversation_id, content=[Part(text=content)] if content else [], + is_partial=not is_final, ) try: await self._client.send_event(event) From 406bd95d8e28dd2a7d52370251cf601f6d922688 Mon Sep 17 00:00:00 2001 From: Christian Serra Date: Wed, 27 May 2026 12:16:09 +0200 Subject: [PATCH 4/9] add: remove plugin and instal it with domyn install-plugin --- .gitignore | 1 + services/hermes_platform_gateway/Dockerfile | 8 +- .../hermes-platform-gateway => }/plugin.yaml | 0 .../plugins/hermes-platform-gateway/README.md | 317 ---------- .../hermes_platform_gateway/__init__.py | 399 ------------ .../hermes_platform_gateway/adapter.py | 588 ------------------ .../hermes_platform_gateway/client.py | 190 ------ .../hermes_platform_gateway/relay_client.py | 130 ---- .../hermes_platform_gateway/schema.py | 57 -- .../hermes-platform-gateway/pyproject.toml | 22 - 10 files changed, 8 insertions(+), 1704 deletions(-) rename services/hermes_platform_gateway/{plugins/hermes-platform-gateway => }/plugin.yaml (100%) delete mode 100644 services/hermes_platform_gateway/plugins/hermes-platform-gateway/README.md delete mode 100644 services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/__init__.py delete mode 100644 services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py delete mode 100644 services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/client.py delete mode 100644 services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/relay_client.py delete mode 100644 services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/schema.py delete mode 100644 services/hermes_platform_gateway/plugins/hermes-platform-gateway/pyproject.toml diff --git a/.gitignore b/.gitignore index ef4e87e..939606a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ __pycache__/ *.pyo *.pyd .Pythonservices/custom_ui_guardrail/data/ +*.whl diff --git a/services/hermes_platform_gateway/Dockerfile b/services/hermes_platform_gateway/Dockerfile index 3b68cd0..314b130 100644 --- a/services/hermes_platform_gateway/Dockerfile +++ b/services/hermes_platform_gateway/Dockerfile @@ -34,7 +34,13 @@ RUN pip install --no-cache-dir wheels/domyn_agents-*.whl # `from hermes_platform_gateway.client import …` imports resolve. # - The manifest + __init__.py are also dropped under $HERMES_HOME/plugins # so hermes' discovery sees a `plugin.yaml` and calls `register(ctx)`. -COPY plugins/hermes-platform-gateway/ /opt/hermes-platform-gateway/ +# +# The plugin project is scaffolded from the vendored source inside +# domyn-agents via `domyn install-plugin` β€” no in-repo plugin tree needed. +# The hermes-specific manifest (plugin.yaml) is dropped in afterwards +# since `domyn install-plugin` doesn't generate one. +RUN domyn install-plugin /opt/hermes-platform-gateway --agent-type hermes +COPY plugin.yaml /opt/hermes-platform-gateway/plugin.yaml RUN pip install --no-cache-dir /opt/hermes-platform-gateway && \ mkdir -p ${HERMES_HOME}/plugins/hermes_platform_gateway && \ cp /opt/hermes-platform-gateway/hermes_platform_gateway/*.py \ diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/plugin.yaml b/services/hermes_platform_gateway/plugin.yaml similarity index 100% rename from services/hermes_platform_gateway/plugins/hermes-platform-gateway/plugin.yaml rename to services/hermes_platform_gateway/plugin.yaml diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/README.md b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/README.md deleted file mode 100644 index 1c87c38..0000000 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/README.md +++ /dev/null @@ -1,317 +0,0 @@ -# hermes-platform-gateway - -A hermes-agent pip plugin that dynamically registers canvas-connected tools at startup. No per-canvas YAML files or shell commands required β€” tools are discovered from the platform and forwarded over the relay WebSocket using the same `TOOL_START`/`TOOL_END` protocol as `domyn expose`. - ---- - -## How it works - -1. **Tool discovery** β€” on startup, the plugin POSTs `/api/agents-service/tool/list_delegate_tools_for_channel` to fetch the canvas tools for the given `space_id` + `channel_id` (and optional `configuration_id`). -2. **Schema conversion** β€” platform parameter lists are translated to hermes JSON Schema objects and registered with `ctx.register_tool`. -3. **Platform adapter registration** β€” `register(ctx)` calls `ctx.register_platform("domyn", "Domyn", factory, check_fn)`. Hermes' gateway runner instantiates the adapter when the gateway starts. -4. **Single WebSocket, multiple conversations** β€” the adapter opens one `wss://{DOMYN_BASE_URL}/relay/v1/ws` connection. Inbound `AGENT_START` events are demultiplexed by `conversation_id` and translated to hermes `MessageEvent`s with `session_key = f"domyn:{channel_id}:{conversation_id}"`. -5. **Per-conversation sessions** β€” hermes' `GatewayRunner` maintains one `AIAgent` per `session_key`, cached LRU, with per-session SQLite-backed history. Different conversations run concurrently in separate asyncio tasks. -6. **Outbound responses** β€” when an `AIAgent` finishes its turn, the gateway calls `adapter.send(chat_id=conversation_id, text)`. The adapter looks up the originating `AGENT_START`, copies its correlation IDs, and emits one `AGENT_END` frame. -7. **Tool calls** β€” the tool handler closure reads `parent_agent.session_id`, looks up the `session_key` via the adapter's `_session_id_to_key` map (populated by an `on_session_start` hook), then sends a `TOOL_START` with that conversation's correlation IDs. `TOOL_END`/`TOOL_ERROR` resolve a per-`call_id` future. -8. **Reconnection** β€” the adapter reconnects with full-jitter exponential backoff. In-flight tool calls fail with an error JSON; in-flight hermes turns continue locally (but their response is lost if the WS is still down at send time β€” accepted v1 limitation). -9. **Canvas changes** β€” `RefreshLoop` polls the tool list every `PLATFORM_TOOL_REFRESH_INTERVAL` seconds, registers new tools, deregisters removed ones. - ---- - -## Prerequisites - -- Python 3.11+ -- hermes-agent installed -- `domyn-agents` installed (editable install from the local repo β€” see Installation) - ---- - -## Installation - -The plugin manifest (`plugin.yaml` + `__init__.py`) lives in `~/.hermes/plugins/hermes_platform_gateway/`, but `register()` imports the actual implementation (`fetch_tools`, `RefreshLoop`, `build_ws_url`, …) from the pip-installed `hermes_platform_gateway` package β€” so you must install **into the hermes-agent venv**, not whichever Python happens to be on `$PATH`: - -```bash -HERMES_VENV=~/.hermes/hermes-agent/venv - -# Plugin + dev deps (editable so local changes take effect immediately) -VIRTUAL_ENV=$HERMES_VENV uv pip install --python $HERMES_VENV/bin/python \ - -e /path/to/hermes-platform-gateway - -# domyn-agents β€” required for event models (RelayMessage, BaseEvent, etc.) -VIRTUAL_ENV=$HERMES_VENV uv pip install --python $HERMES_VENV/bin/python \ - -e /path/to/domyn-agents -``` - -Then mirror the manifest + `__init__.py` into `~/.hermes/plugins/hermes_platform_gateway/` (the plugin loader scans that directory for `plugin.yaml`). - -**Enable the plugin** in `~/.hermes/config.yaml`: - -```yaml -plugins: - enabled: - - platform-gateway -``` - -The plugin is opt-in. hermes will silently ignore it if this line is absent. - ---- - -## Configuration - -All configuration is via environment variables injected before hermes starts (typically by a sandbox supervisor): - -| Variable | Required | Default | Purpose | -|---|---|---|---| -| `DOMYN_API_KEY` | Yes | β€” | Auth for HTTP tool discovery and WebSocket handshake | -| `DOMYN_BASE_URL` | Yes | β€” | Platform host, no scheme β€” bare hostname like `conv2.crystal.io` (the plugin prepends `api.` for HTTP, mirrors `domyn expose`) | -| `DOMYN_SPACE_ID` | Yes | β€” | Scopes tool discovery to a specific canvas | -| `DOMYN_CHANNEL_ID` | Yes | β€” | WebSocket relay channel + body field on the discovery POST | -| `DOMYN_CONFIGURATION_ID` | No | β€” | Pin discovery to a specific configuration (omit for active) | -| `PLATFORM_TOOL_TIMEOUT` | No | `120` | Per-call timeout in seconds | -| `PLATFORM_TOOL_REFRESH_INTERVAL` | No | `60` | Canvas poll interval in seconds; set to `0` to disable | - -If any required variable is missing, the plugin logs a warning and hermes starts with zero platform tools (fail-open). - ---- - -## Quickstart β€” local stub - -`stub_platform.py` simulates the platform relay on `localhost:9999`. It serves the HTTP tool list and handles WebSocket tool calls. - -### 1. Install dependencies - -```bash -cd /path/to/hermes-platform-gateway -uv pip install -e ".[dev]" -``` - -### 2. Start the stub - -```bash -uv run python stub_platform.py -``` - -Expected output: -``` -[stub] Listening on http://localhost:9999 -[stub] WebSocket at ws://localhost:9999/relay/v1/ws -``` - -The stub exposes one tool: `echo` β€” it returns `"echo: "` for any `message` argument. - -### 3. Verify the tool list endpoint - -```bash -curl -s -X POST http://localhost:9999/api/agents-service/tool/list_from_config \ - -H "Content-Type: application/json" \ - -d '{"space_id": "s1"}' | python -m json.tool -``` - -Expected: -```json -[ - { - "name": "echo", - "description": "Echo the input message back", - "parameters": [ - { - "name": "message", - "type": "str", - "is_required": true, - "description": "The message to echo" - } - ] - } -] -``` - -### 4. Run hermes with platform tools - -Open a second terminal: - -```bash -DOMYN_API_KEY=test \ -DOMYN_BASE_URL=localhost:9999 \ -DOMYN_SPACE_ID=s1 \ -DOMYN_CHANNEL_ID=c1 \ -hermes -``` - -At startup you should see a log line like: -``` -platform-gateway: registered 1 platform tool(s) -``` - -### 5. Invoke the platform tool - -Ask hermes to use it: - -``` -> Use the echo tool with message "hello" -``` - -In the stub terminal you will see: -``` -[stub] TOOL_START tool=echo params={'message': 'hello'} call_id= -[stub] TOOL_END observation='echo: hello' -``` - -hermes receives the result and replies with it. - ---- - -## Connecting to the real platform - -Set the env vars to point at your actual platform: - -```bash -DOMYN_API_KEY= \ -DOMYN_BASE_URL=api.yourdomain.com \ -DOMYN_SPACE_ID= \ -DOMYN_CHANNEL_ID= \ -hermes -``` - -`DOMYN_BASE_URL` is a bare hostname (with optional port). The plugin uses `wss://` for remote hosts and `ws://` for localhost. Tool discovery uses `https://` / `http://` by the same rule. - ---- - -## Development - -```bash -# Install dev dependencies -uv pip install -e ".[dev]" - -# Run tests -uv run --active pytest tests/ -v - -# Run a single file -uv run --active pytest tests/test_relay_client.py -v -``` - -Test files: - -| File | What it covers | -|---|---| -| `tests/test_schema.py` | `convert_schema()` β€” type mapping, required/optional/default, unknown types | -| `tests/test_client.py` | `fetch_tools()` HTTP requests, `build_ws_url()` scheme selection | -| `tests/test_relay_client.py` | `DomynRelayClient` β€” framing, receive loop, full-jitter reconnect backoff | -| `tests/test_adapter.py` | `DomynPlatformAdapter` β€” inbound AGENT_START, send/AGENT_END, tool-call routing, session_id↔key map | -| `tests/test_register.py` | `register(ctx)` β€” env var checks, schema wiring, handler delegation, correct WS URL and headers | -| `tests/test_integration.py` | End-to-end against a real in-process stub β€” tool discovery, TOOL_START/END round-trip, inbound AGENT_START, outbound send_event, auth headers | - ---- - -## Protocol reference - -### Tool discovery (HTTP) - -``` -POST https://api.{DOMYN_BASE_URL}/api/agents-service/tool/list_delegate_tools_for_channel -Headers: api-key: {DOMYN_API_KEY} - Content-Type: application/json -Body: { - "space_id": "{DOMYN_SPACE_ID}", - "channel_id": "{DOMYN_CHANNEL_ID}", - "configuration_id": "{DOMYN_CONFIGURATION_ID}" // null when unset - } -``` - -Response: a JSON array of tool definitions, or `{"tools": [...]}`. - -### Tool call (WebSocket) - -**Outbound (hermes β†’ platform):** -```json -{ - "meta": {}, - "payload": { - "event_type": "tool_start", - "author": "hermes", - "action": { - "type": "ToolAction", - "name": "send_email", - "parameters": {"to": "user@example.com"}, - "call_id": "" - } - } -} -``` - -**Inbound (platform β†’ hermes):** -```json -{ - "meta": {}, - "payload": { - "event_type": "tool_end", - "author": "platform", - "action": { - "type": "ToolAction", - "name": "send_email", - "call_id": "", - "observation": "Email sent successfully" - } - } -} -``` - -The `call_id` on `TOOL_END` / `TOOL_ERROR` resolves the matching in-flight `concurrent.futures.Future`. On `TOOL_ERROR`, the future is rejected and the handler returns `{"error": ""}`. - -### Bidirectional relay (platform β†’ hermes) - -**Inbound user turn (`AGENT_START`):** -```json -{ - "meta": {}, - "payload": { - "event_type": "agent_start", - "author": "platform", - "interaction_id": "", - "turn_id": "", - "action": { - "type": "AgentAction", - "name": "invoke", - "parameters": {"input": "What is the weather?"} - } - } -} -``` - -**Outbound streaming token (`RESPONSE`):** -```json -{ - "meta": {}, - "payload": { - "event_type": "response", - "author": "platform", - "interaction_id": "", - "turn_id": "", - "is_partial": true, - "content": [{"type": "Part", "text": "The weather"}] - } -} -``` - -**Outbound turn complete (`AGENT_END`):** -```json -{ - "meta": {}, - "payload": { - "event_type": "agent_end", - "author": "platform", - "interaction_id": "", - "turn_id": "", - "content": [{"type": "Part", "text": "The weather is sunny."}] - } -} -``` - -`interaction_id` and `turn_id` are copied from the originating `AGENT_START` so the platform can correlate streaming fragments with the triggering request. - -### WebSocket auth headers - -``` -channel-id: {DOMYN_CHANNEL_ID} -space-id: {DOMYN_SPACE_ID} -api-key: {DOMYN_API_KEY} -``` diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/__init__.py b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/__init__.py deleted file mode 100644 index e372bb6..0000000 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/__init__.py +++ /dev/null @@ -1,399 +0,0 @@ -"""hermes-platform-gateway plugin β€” gateway-mode multi-conversation adapter. - -Registers a Domyn platform adapter via ``ctx.register_platform``. The -adapter opens one WebSocket per worker to the Domyn relay and routes -each ``conversation_id`` to its own hermes session. -""" - -from __future__ import annotations - -import asyncio -import json -import logging -import os -import uuid -from collections.abc import Callable -from typing import Any - -from .client import ( - RefreshLoop, - build_ws_url, - fetch_tools, -) -from .schema import convert_schema - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# Constants -# --------------------------------------------------------------------------- - -_REQUIRED = ("DOMYN_API_KEY", "DOMYN_BASE_URL", "DOMYN_SPACE_ID", "DOMYN_CHANNEL_ID") - - -# --------------------------------------------------------------------------- -# Helpers β€” pure functions, no adapter / hermes state -# --------------------------------------------------------------------------- - - -def _extract_last_reasoning(conversation_history: Any) -> str: - """Return the current turn's reasoning trace, or ``""`` when absent. - - Walks ``conversation_history`` backwards and picks the last assistant - message whose ``reasoning`` field is populated, stopping at the user - message that started the turn. Mirrors hermes' own extraction at - ``run_agent.py:14066-14072``. - """ - if not conversation_history: - return "" - for msg in reversed(list(conversation_history)): - if not isinstance(msg, dict): - continue - if msg.get("role") == "user": - break - if msg.get("role") != "assistant": - continue - reasoning = msg.get("reasoning") or msg.get("reasoning_content") - if not reasoning: - continue - if isinstance(reasoning, str): - return reasoning - if isinstance(reasoning, list): - parts: list[str] = [] - for item in reasoning: - if isinstance(item, str): - parts.append(item) - elif isinstance(item, dict): - text = item.get("text") or item.get("thinking") or "" - if text: - parts.append(str(text)) - if parts: - return "\n".join(parts) - return "" - - -# TODO(thought-process): drop the hardcoded placeholder once the active -# provider surfaces ``reasoning_content`` on assistant messages. -def _hardcoded_tool_thought(tool_name: str, args: Any) -> str: - """Placeholder ``ToolAction.thought`` text when no real reasoning is available.""" - try: - args_preview = ", ".join(f"{k}={v!r}" for k, v in (args or {}).items())[:200] - except Exception: - args_preview = "" - suffix = f" with {args_preview}" if args_preview else "" - return ( - f"[hardcoded placeholder] Hermes is invoking `{tool_name}`{suffix}. " - "Real reasoning will appear here once the LLM provider exposes " - "reasoning_content." - ) - - -# --------------------------------------------------------------------------- -# Hermes runtime lookups β€” read-only access to gateway internals -# --------------------------------------------------------------------------- - - -def _runner_ref_or_none() -> Any: - """Return the live ``GatewayRunner`` instance, or None.""" - try: - from gateway.run import _gateway_runner_ref - except Exception: - return None - return _gateway_runner_ref() if _gateway_runner_ref else None - - -def _chat_id_and_thought_for_task(task_id: str) -> tuple[str | None, str]: - """Return ``(chat_id, current_thought)`` for the AIAgent driving *task_id*. - - Scans the gateway's ``_running_agents`` for the agent whose - ``_current_task_id`` matches, then pulls both its ``_chat_id`` and the - most-recent reasoning trace within the current turn so the platform - can render *why* hermes is invoking each tool, not just *that* it is. - Returns ``(None, "")`` when the lookup fails. - """ - runner = _runner_ref_or_none() - if runner is None: - return None, "" - running = getattr(runner, "_running_agents", None) or {} - for agent in running.values(): - if getattr(agent, "_current_task_id", None) != task_id: - continue - chat_id = getattr(agent, "_chat_id", None) - thought = _extract_last_reasoning(getattr(agent, "messages", None)) - return chat_id, thought - return None, "" - - -# --------------------------------------------------------------------------- -# Async dispatch β€” hooks fire on hermes' worker thread; outbound sends -# must reach the relay client's loop or websockets raises about loop -# affinity. ``_schedule_on_gateway_loop`` is the cross-loop bridge. -# --------------------------------------------------------------------------- - - -def _schedule_on_gateway_loop(coro: Any, *, label: str) -> None: - """Fire-and-forget schedule of *coro* on the gateway's event loop.""" - runner = _runner_ref_or_none() - gateway_loop = getattr(runner, "_gateway_loop", None) if runner else None - if gateway_loop is not None and not gateway_loop.is_closed(): - try: - asyncio.run_coroutine_threadsafe(coro, gateway_loop) - return - except Exception as exc: - logger.warning("platform-gateway: schedule %s failed - %s", label, exc) - # Same-loop fallback for tests / synchronous contexts. - try: - asyncio.get_event_loop().create_task(coro) - except Exception as exc: - logger.warning("platform-gateway: fallback %s dispatch failed - %s", label, exc) - - -# --------------------------------------------------------------------------- -# Refresh loop and tool handler factory β€” module-level so tests can stub -# the daemon thread and so ``RefreshLoop._refresh`` can rebuild handlers -# when new canvas tools appear. -# --------------------------------------------------------------------------- - - -def _start_refresh_loop(**kwargs: Any) -> None: - """Indirection so tests can stub the daemon thread.""" - RefreshLoop(**kwargs).start() - - -def _make_tool_handler( - adapter_slot: list[Any], tool_name: str, timeout: float -) -> Callable[..., Any]: - """Build the async handler that bridges hermes' tool registry to the adapter. - - The registry only passes ``task_id`` to platform tool handlers (not - ``parent_agent``), so we look up the chat_id via the per-task stash - that ``pre_tool_call`` populated. - """ - - async def handler(args: dict, **kwargs: Any) -> str: - adapter = adapter_slot[0] - if adapter is None: - return json.dumps({"error": "platform-gateway: adapter not ready"}) - task_id = kwargs.get("task_id") or "" - chat_id = adapter._chat_id_by_task.get(task_id) - if not chat_id: - return json.dumps({"error": "platform-gateway: no chat_id for task_id"}) - session_key = adapter.session_key_for_chat(chat_id) - thought = adapter.thought_for_task(task_id) - return await adapter.call_tool( - session_key=session_key, - tool_name=tool_name, - args=args, - thought=thought, - timeout=timeout, - ) - - return handler - - -# --------------------------------------------------------------------------- -# Plugin entry point -# --------------------------------------------------------------------------- - - -def register(ctx: Any) -> None: - """Plugin entry β€” called once by hermes' plugin loader at startup.""" - missing = [v for v in _REQUIRED if not os.environ.get(v)] - if missing: - logger.warning( - "platform-gateway: skipping registration β€” missing env vars: %s", - ", ".join(missing), - ) - return - - api_key = os.environ["DOMYN_API_KEY"] - base_url = os.environ["DOMYN_BASE_URL"].rstrip("/") - space_id = os.environ["DOMYN_SPACE_ID"] - channel_id = os.environ["DOMYN_CHANNEL_ID"] - configuration_id = os.environ.get("DOMYN_CONFIGURATION_ID") or None - timeout = float(os.environ.get("PLATFORM_TOOL_TIMEOUT", "120")) - refresh_interval = float(os.environ.get("PLATFORM_TOOL_REFRESH_INTERVAL", "60")) - - try: - raw_tools = fetch_tools( - base_url, - space_id, - channel_id, - api_key, - configuration_id=configuration_id, - ) - except Exception as exc: - logger.warning("platform-gateway: could not fetch tools - %s", exc) - raw_tools = [] - - # Adapter is built lazily by the factory so the gateway controls - # its lifecycle. We close over a one-slot list so the tool - # handlers (registered now) can find the live adapter once the - # factory has been called. - adapter_slot: list[Any] = [None] - ws_url = build_ws_url(base_url) - headers = {"channel-id": channel_id, "space-id": space_id, "api-key": api_key} - - def _factory(config: Any) -> Any: - from .adapter import DomynPlatformAdapter - from .relay_client import DomynRelayClient - - def relay_factory(on_event: Callable[[Any], Any]) -> Any: - return DomynRelayClient(ws_url=ws_url, headers=headers, on_event=on_event) - - adapter = DomynPlatformAdapter( - config=config, - channel_id=channel_id, - relay_client_factory=relay_factory, - ) - adapter_slot[0] = adapter - return adapter - - def _check() -> bool: - return all(os.environ.get(v) for v in _REQUIRED) - - ctx.register_platform( - name="domyn", - label="Domyn", - adapter_factory=_factory, - check_fn=_check, - required_env=list(_REQUIRED), - allowed_users_env="DOMYN_ALLOWED_USERS", - allow_all_env="DOMYN_ALLOW_ALL_USERS", - ) - - # --- Tool registration --- - registered_names: set[str] = set() - for tool_def in raw_tools: - name = tool_def.get("name") - if not name: - continue - try: - schema = convert_schema(tool_def) - except Exception as exc: - logger.warning("platform-gateway: skipping tool '%s' - schema error: %s", name, exc) - continue - ctx.register_tool( - name=name, - toolset="platform", - schema=schema, - handler=_make_tool_handler(adapter_slot, name, timeout), - is_async=True, - ) - registered_names.add(name) - logger.info( - "platform-gateway: registered domyn adapter with %d platform tool(s): %s", - len(registered_names), - sorted(registered_names), - ) - - # --- Hooks --- - # pre_tool_call bridges the AIAgent β†’ tool-handler gap: hermes' tool - # registry only passes ``task_id`` to the handler (not ``parent_agent``), - # so we scan _running_agents for the matching agent and stash its - # chat_id (+ current reasoning) under task_id. post_tool_call drops - # the stash and emits a visibility TOOL_END for built-in tools. - _platform_tool_names: set[str] = set(registered_names) - - def _on_pre_tool_call( - tool_name: str = "", - args: Any = None, - task_id: str = "", - **_: Any, - ) -> None: - adapter = adapter_slot[0] - if adapter is None or not task_id: - return - chat_id, thought = _chat_id_and_thought_for_task(task_id) - if not chat_id: - return - - is_platform = tool_name in _platform_tool_names - effective_thought = thought or _hardcoded_tool_thought(tool_name, args) - logger.debug( - "platform-gateway: pre_tool_call %s task_id=%s chat_id=%s is_platform=%s has_real_thought=%s", - tool_name, - task_id, - chat_id, - is_platform, - bool(thought), - ) - - # Always stash chat_id (+ thought) β€” post_tool_call reads this - # back regardless of branch. - adapter.record_task_chat( - task_id=task_id, - chat_id=chat_id, - thought=effective_thought, - ) - - if is_platform: - # Real platform tool: the canonical TOOL_START goes out - # from adapter.call_tool with a pending future. Done here. - return - - # Built-in hermes tool: visibility-only TOOL_START. Generate a - # fresh call_id, stash it so post_tool_call can pair the TOOL_END. - call_id = f"visibility-{uuid.uuid4()}" - adapter.record_visibility_call(task_id=task_id, call_id=call_id) - _schedule_on_gateway_loop( - adapter.emit_visibility_tool_start( - chat_id=chat_id, - tool_name=tool_name, - args=args if isinstance(args, dict) else {}, - thought=effective_thought, - call_id=call_id, - ), - label=f"visibility TOOL_START {tool_name}", - ) - - def _on_post_tool_call( - tool_name: str = "", - task_id: str = "", - result: Any = None, - **_: Any, - ) -> None: - adapter = adapter_slot[0] - if adapter is None: - return - chat_id = adapter._chat_id_by_task.get(task_id) - visibility_call_id = adapter.pop_visibility_call(task_id) - adapter.forget_task(task_id=task_id) - - # Visibility TOOL_END only for built-in tools that emitted a - # matching visibility TOOL_START. Platform tools have their own - # TOOL_END round-trip from the relay; don't double-emit. - if not visibility_call_id or tool_name in _platform_tool_names: - return - if not chat_id: - return - _schedule_on_gateway_loop( - adapter.emit_visibility_tool_end( - chat_id=chat_id, - tool_name=tool_name, - call_id=visibility_call_id, - observation=result, - ), - label=f"visibility TOOL_END {tool_name}", - ) - - ctx.register_hook("pre_tool_call", _on_pre_tool_call) - ctx.register_hook("post_tool_call", _on_post_tool_call) - # END is emitted by the adapter's on_processing_complete override β€” - # see ``DomynPlatformAdapter.on_processing_complete`` for the - # rationale (post_llm_call/on_session_end both fire too early). - - # --- Tool list refresh --- - if refresh_interval > 0: - _start_refresh_loop( - ctx=ctx, - handler_factory=lambda nm: _make_tool_handler(adapter_slot, nm, timeout), - base_url=base_url, - space_id=space_id, - channel_id=channel_id, - api_key=api_key, - interval=refresh_interval, - initial_names=registered_names, - configuration_id=configuration_id, - ) diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py deleted file mode 100644 index 6bdc7c3..0000000 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/adapter.py +++ /dev/null @@ -1,588 +0,0 @@ -"""DomynPlatformAdapter β€” gateway-mode bridge to the Domyn relay. - -Per turn: - AGENT_START (in) β†’ STARTED (out) β†’ handle_message dispatch - β†’ (tool calls round-trip via the relay) - β†’ AGENT_END (out, one per visible chat message) - β†’ END (out, exactly once, from on_processing_complete) - -Per platform tool call: - pre_tool_call hook stashes (chat_id, thought) by task_id - tool handler calls adapter.call_tool(session_key, tool_name, args, thought) - adapter emits TOOL_START, awaits TOOL_END/TOOL_ERROR from the relay - _resolve_tool_call wakes the awaiting handler via call_soon_threadsafe -""" - -from __future__ import annotations - -import asyncio -import contextlib -import json -import logging -import uuid -from collections.abc import Callable -from datetime import datetime -from typing import Any - -from gateway.config import Platform, PlatformConfig -from gateway.platform_registry import PlatformEntry, platform_registry -from gateway.platforms.base import ( - BasePlatformAdapter, - MessageEvent, - MessageType, - SendResult, -) -from gateway.session import SessionSource - -logger = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# Module-level helpers -# --------------------------------------------------------------------------- - - -def _build_session_key(channel_id: str, conversation_id: str) -> str: - return f"domyn:{channel_id}:{conversation_id}" - - -def _extract_user_text(event: Any) -> str: - """Pull plain user text out of an AGENT_START relay event.""" - action = getattr(event, "action", None) - params = getattr(action, "parameters", None) if action else None - if params: - for key in ("input", "text", "message", "content"): - val = params.get(key) - if isinstance(val, str) and val: - return val - for part in getattr(event, "content", None) or []: - text = getattr(part, "text", None) - if text: - return text - if params: - return json.dumps(params) - return "" - - -def _serialise_observation(observation: Any) -> str: - """Return *observation* as a JSON string (passthrough if already valid JSON).""" - if isinstance(observation, str): - try: - json.loads(observation) - return observation - except json.JSONDecodeError: - pass - return json.dumps(observation) - - -# Register "domyn" as a dynamic Platform value so ``Platform("domyn")`` -# resolves via the enum's _missing_() hook at adapter construction time. -# Module-level side-effect because we need it before any DomynPlatformAdapter -# instance is built (including the standalone test ones that don't go -# through ctx.register_platform). -def _ensure_domyn_registered() -> None: - if not platform_registry.is_registered("domyn"): - platform_registry.register( - PlatformEntry( - name="domyn", - label="Domyn", - adapter_factory=lambda cfg: None, - check_fn=lambda: True, - ) - ) - - -_ensure_domyn_registered() - - -# --------------------------------------------------------------------------- -# Adapter -# --------------------------------------------------------------------------- - - -class DomynPlatformAdapter(BasePlatformAdapter): - """Bridges the Domyn relay WebSocket to hermes' gateway runner.""" - - # ===================================================================== - # Lifecycle (BasePlatformAdapter contract) - # ===================================================================== - - def __init__( - self, - config: PlatformConfig, - *, - channel_id: str, - relay_client_factory: Callable[[Callable[[Any], Any]], Any], - ) -> None: - super().__init__(config=config, platform=Platform("domyn")) - self._channel_id = channel_id - # Per-conversation state β€” keyed by adapter-internal session_key. - self._turn_by_session: dict[str, Any] = {} - # Per-tool-call state β€” keyed by hermes' registry task_id. - self._chat_id_by_task: dict[str, str] = {} - self._thought_by_task: dict[str, str] = {} - # Per-call_id state β€” for visibility (built-in tools) vs real - # platform tool round-trips. Disjoint keyspaces. - self._visibility_call_id_by_task: dict[str, str] = {} - self._pending_calls: dict[str, Any] = {} # call_id -> (Future, loop) - self._client = relay_client_factory(self._on_event) - - async def connect(self) -> bool: - await self._client.connect() - self._mark_connected() - return True - - async def disconnect(self) -> None: - await self._client.disconnect() - self._fail_pending("disconnect") - self._mark_disconnected() - - async def send_typing(self, chat_id: str, metadata: Any = None) -> None: - return None - - async def get_chat_info(self, chat_id: str) -> dict[str, Any]: - return {"name": chat_id, "type": "dm", "chat_id": chat_id} - - async def on_processing_complete(self, event: MessageEvent, outcome: Any) -> None: - """Emit the terminal END *after* the gateway's final send returns. - - ``BasePlatformAdapter._process_message_background`` calls this - hook after the final ``adapter.send`` returns (see - ``gateway/platforms/base.py:2964``), so it's the only place we - can emit END *after* the final AGENT_END rather than before. - """ - chat_id = getattr(getattr(event, "source", None), "chat_id", None) - if not chat_id: - return - turn = self._turn_by_session.get(_build_session_key(self._channel_id, chat_id)) - if turn is None: - return - await self.emit_end(turn=turn) - - # ===================================================================== - # Inbound events from the relay - # ===================================================================== - - async def _on_event(self, event: Any) -> None: - from domyn_agents.core import ExecutionEventType - - et = getattr(event, "event_type", None) - logger.debug( - "domyn-adapter: inbound event type=%s conversation_id=%s call_id=%s", - getattr(et, "value", et), - getattr(event, "conversation_id", None), - getattr(getattr(event, "action", None), "call_id", None), - ) - if et == ExecutionEventType.AGENT_START: - await self._handle_agent_start(event) - return - if et in (ExecutionEventType.TOOL_END, ExecutionEventType.TOOL_ERROR): - self._resolve_tool_call(event) - - async def _handle_agent_start(self, event: Any) -> None: - conv_id = getattr(event, "conversation_id", None) - if not conv_id: - logger.warning("domyn-adapter: AGENT_START missing conversation_id, dropping") - return - text = _extract_user_text(event) - if not text: - logger.warning("domyn-adapter: AGENT_START with no extractable text, dropping") - return - - self._turn_by_session[_build_session_key(self._channel_id, conv_id)] = event - - # Bookend: STARTED tells the UI "received, working" β€” sent - # BEFORE handle_message dispatches into the gateway so the - # signal is visible even on long first turns. - await self.emit_started(turn=event) - - source = SessionSource( - platform=Platform("domyn"), - chat_id=conv_id, - chat_name=conv_id, - chat_type="dm", - user_id=getattr(event, "author", None), - user_name=getattr(event, "author", None), - ) - msg = MessageEvent( - text=text, - message_type=MessageType.TEXT, - source=source, - message_id=getattr(event, "event_id", None), - timestamp=datetime.now(), - ) - await self.handle_message(msg) - - # ===================================================================== - # Outbound emits - # ===================================================================== - - async def send( - self, - chat_id: str, - content: str, - reply_to: str | None = None, - metadata: dict[str, Any] | None = None, - ) -> SendResult: - """Emit either RESPONSE (interim narrative) or AGENT_END (final). - - Routing is by ``reply_to``: - - ``reply_to is None``: hermes' ``_interim_assistant_callback`` - calls ``adapter.send`` directly (mid-turn narrative like "I'll - do X, then Y"). We emit ``RESPONSE`` with ``is_partial=True`` - β€” semantically "incremental assistant text", not a thought, - and no need for a paired LLM_START. - - ``reply_to is not None``: the base class' ``_send_with_retry`` - anchors final responses (and slash-command replies) to the - triggering message_id. We emit ``AGENT_END``. - - END is NOT emitted here β€” that's per-turn, fired from - ``on_processing_complete``. - """ - from domyn_agents.core import BaseEvent, ExecutionEventType, Part - - session_key = _build_session_key(self._channel_id, chat_id) - turn = self._turn_by_session.get(session_key) - logger.warning("EVENT CONTENT: %s", content) - logger.warning("EVENT METADATA: %s", metadata) - if turn is None: - preview = (content or "")[:100] - logger.warning( - "domyn-adapter: send for %s with no prior AGENT_START (preview=%r)", - session_key, - preview, - ) - return SendResult(success=False, error="no prior AGENT_START") - - is_final = reply_to is not None - event_type = ( - ExecutionEventType.AGENT_END if is_final - else ExecutionEventType.RESPONSE - ) - # event_id is per-frame unique; let BaseEvent auto-generate. - # Copying turn.event_id collides with TOOL_START (also fires - # against the same turn) and confuses platforms that key on it. - event = BaseEvent( - event_type=event_type, - author=turn.author, - interaction_id=turn.interaction_id, - turn_id=turn.turn_id, - conversation_id=turn.conversation_id, - content=[Part(text=content)] if content else [], - is_partial=not is_final, - ) - try: - await self._client.send_event(event) - except Exception as exc: - logger.warning("domyn-adapter: send_event failed - %s", exc) - return SendResult(success=False, error=str(exc), retryable=True) - return SendResult(success=True, message_id=event.event_id) - - async def emit_started(self, *, turn: Any) -> None: - """Push STARTED on AGENT_START receipt (fire-and-forget).""" - from domyn_agents.core import BaseEvent, ExecutionEventType - - event = BaseEvent( - event_type=ExecutionEventType.STARTED, - author=turn.author, - interaction_id=turn.interaction_id, - turn_id=turn.turn_id, - conversation_id=turn.conversation_id, - ) - try: - await self._client.send_event(event) - except Exception as exc: - logger.warning("domyn-adapter: emit_started failed - %s", exc) - - async def emit_end(self, *, turn: Any) -> None: - """Push END once per turn after AGENT_END has been delivered.""" - from domyn_agents.core import BaseEvent, ExecutionEventType - - event = BaseEvent( - event_type=ExecutionEventType.END, - author=turn.author, - interaction_id=turn.interaction_id, - turn_id=turn.turn_id, - conversation_id=turn.conversation_id, - ) - try: - await self._client.send_event(event) - except Exception as exc: - logger.warning("domyn-adapter: emit_end failed - %s", exc) - - async def emit_visibility_tool_start( - self, - *, - chat_id: str, - tool_name: str, - args: dict[str, Any], - thought: str | None, - call_id: str, - ) -> None: - """Push TOOL_START for a hermes built-in tool (visibility only). - - No pending future is registered and no TOOL_END round-trip is - expected from the platform β€” the matching TOOL_END is emitted by - ``emit_visibility_tool_end`` from the post_tool_call hook. - """ - from domyn_agents.core import BaseEvent, ExecutionEventType, ToolAction - - turn = self._turn_by_session.get(_build_session_key(self._channel_id, chat_id)) - if turn is None: - return - event = BaseEvent( - event_type=ExecutionEventType.TOOL_START, - author=turn.author, - interaction_id=turn.interaction_id, - turn_id=turn.turn_id, - conversation_id=turn.conversation_id, - action=ToolAction( - name=tool_name, - parameters=args or {}, - call_id=call_id, - thought=thought, - ), - ) - try: - await self._client.send_event(event) - except Exception as exc: - logger.warning( - "domyn-adapter: emit_visibility_tool_start %s failed - %s", - tool_name, - exc, - ) - - async def emit_visibility_tool_end( - self, - *, - chat_id: str, - tool_name: str, - call_id: str, - observation: Any, - ) -> None: - """Push TOOL_END for a hermes built-in tool (companion to start).""" - from domyn_agents.core import BaseEvent, ExecutionEventType, ToolAction - - turn = self._turn_by_session.get(_build_session_key(self._channel_id, chat_id)) - if turn is None: - return - event = BaseEvent( - event_type=ExecutionEventType.TOOL_END, - author=turn.author, - interaction_id=turn.interaction_id, - turn_id=turn.turn_id, - conversation_id=turn.conversation_id, - action=ToolAction( - name=tool_name, - parameters={}, - call_id=call_id, - observation=observation, - ), - ) - try: - await self._client.send_event(event) - except Exception as exc: - logger.warning( - "domyn-adapter: emit_visibility_tool_end %s failed - %s", - tool_name, - exc, - ) - - # ===================================================================== - # Tool-call routing (real platform tools β€” round-trip via the relay) - # ===================================================================== - - async def call_tool( - self, - *, - session_key: str, - tool_name: str, - args: dict[str, Any], - thought: str | None = None, - timeout: float = 120.0, - ) -> str: - """Send TOOL_START, await TOOL_END/TOOL_ERROR, return the observation as JSON.""" - from domyn_agents.core import BaseEvent, ExecutionEventType, ToolAction - - turn = self._turn_by_session.get(session_key) - if turn is None: - return json.dumps({"error": "no active turn for session"}) - - call_id = str(uuid.uuid4()) - loop = asyncio.get_running_loop() - fut: asyncio.Future = loop.create_future() - # Stash (future, loop) β€” _resolve_tool_call runs on the relay - # client's loop, which may differ from this one, so we'll need - # call_soon_threadsafe to wake the awaiter. - self._pending_calls[call_id] = (fut, loop) - - start = BaseEvent( - event_type=ExecutionEventType.TOOL_START, - author=turn.author, - interaction_id=turn.interaction_id, - turn_id=turn.turn_id, - conversation_id=turn.conversation_id, - action=ToolAction( - name=tool_name, - parameters=args, - call_id=call_id, - thought=thought, - ), - ) - logger.debug( - "domyn-adapter: TOOL_START name=%s call_id=%s conversation_id=%s", - tool_name, - call_id, - turn.conversation_id, - ) - try: - await self._client.send_event(start) - except Exception as exc: - self._pending_calls.pop(call_id, None) - logger.warning( - "domyn-adapter: TOOL_START send failed for %s call_id=%s - %s", - tool_name, - call_id, - exc, - ) - return json.dumps({"error": f"send failed: {exc}"}) - - try: - observation = await asyncio.wait_for(fut, timeout=timeout) - logger.debug( - "domyn-adapter: TOOL_END received name=%s call_id=%s", - tool_name, - call_id, - ) - return _serialise_observation(observation) - except TimeoutError: - self._pending_calls.pop(call_id, None) - logger.warning( - "domyn-adapter: TOOL timeout name=%s call_id=%s after %.1fs", - tool_name, - call_id, - timeout, - ) - return json.dumps({"error": f"Tool '{tool_name}' timed out after {timeout}s"}) - except Exception as exc: - logger.warning( - "domyn-adapter: TOOL future raised name=%s call_id=%s - %s", - tool_name, - call_id, - exc, - ) - return json.dumps({"error": str(exc)}) - - def _resolve_tool_call(self, event: Any) -> None: - """Wake the awaiter in call_tool when TOOL_END/TOOL_ERROR arrives.""" - from domyn_agents.core import ExecutionEventType - - et_value = getattr(event.event_type, "value", event.event_type) - call_id = getattr(getattr(event, "action", None), "call_id", None) - if not call_id: - logger.warning("domyn-adapter: %s with no call_id, dropping", et_value) - return - entry = self._pending_calls.pop(call_id, None) - if entry is None: - logger.warning( - "domyn-adapter: %s call_id=%s has no pending future (pending=%s)", - et_value, - call_id, - sorted(self._pending_calls.keys()), - ) - return - fut, fut_loop = entry - if fut.done(): - logger.warning( - "domyn-adapter: %s call_id=%s future already resolved", - et_value, - call_id, - ) - return - if event.event_type == ExecutionEventType.TOOL_ERROR: - msg = ( - getattr(event, "error_message", None) - or f"platform tool error ({getattr(event, 'error_code', '')})" - ) - self._resolve_future(fut, fut_loop, exc=RuntimeError(msg)) - else: - observation = getattr(getattr(event, "action", None), "observation", None) - self._resolve_future(fut, fut_loop, result=observation) - - def _fail_pending(self, reason: str) -> None: - """Fail every in-flight tool call β€” called on disconnect.""" - for fut, fut_loop in self._pending_calls.values(): - if fut.done(): - continue - fut_loop.call_soon_threadsafe(fut.set_exception, RuntimeError(reason)) - self._pending_calls.clear() - - @staticmethod - def _resolve_future( - fut: asyncio.Future, - fut_loop: asyncio.AbstractEventLoop, - *, - result: Any = None, - exc: BaseException | None = None, - ) -> None: - """Resolve a Future from a potentially different event loop. - - The future is bound to the loop where ``call_tool`` ran (a - worker loop spawned by hermes' ``_run_async``). The relay - receive loop, which calls us, runs in the gateway's loop. - ``call_soon_threadsafe`` is the canonical cross-loop bridge. - """ - - def _apply() -> None: - if fut.done(): - return - if exc is not None: - fut.set_exception(exc) - else: - fut.set_result(result) - - # Worker loop closed already β€” nothing waiting. - with contextlib.suppress(RuntimeError): - fut_loop.call_soon_threadsafe(_apply) - - # ===================================================================== - # Per-task bookkeeping (used by pre_tool_call / post_tool_call hooks) - # ===================================================================== - - def session_key_for_chat(self, chat_id: str) -> str: - """Derive the adapter's internal session_key from a chat_id.""" - return _build_session_key(self._channel_id, chat_id) - - def record_task_chat(self, *, task_id: str, chat_id: str, thought: str | None = None) -> None: - """Stash (chat_id, thought) for a registry task_id. - - The platform tool handler only receives ``task_id`` from - ``registry.dispatch`` (not ``parent_agent``), so the - pre_tool_call hook stores the chat_id under task_id here and the - handler reads it back. The optional ``thought`` rides on - ``ToolAction.thought`` so the platform can render *why* the tool - was invoked. - """ - if task_id and chat_id: - self._chat_id_by_task[task_id] = chat_id - if thought: - self._thought_by_task[task_id] = thought - - def forget_task(self, *, task_id: str) -> None: - """Drop the per-task stashes after the tool finishes.""" - if task_id: - self._chat_id_by_task.pop(task_id, None) - self._thought_by_task.pop(task_id, None) - - def thought_for_task(self, task_id: str) -> str | None: - """Return the thought stashed by pre_tool_call, if any.""" - return self._thought_by_task.get(task_id) - - def record_visibility_call(self, *, task_id: str, call_id: str) -> None: - """Pair a visibility TOOL_START's call_id with task_id for post_tool_call.""" - if task_id and call_id: - self._visibility_call_id_by_task[task_id] = call_id - - def pop_visibility_call(self, task_id: str) -> str | None: - """Return-and-clear the visibility call_id for *task_id*.""" - return self._visibility_call_id_by_task.pop(task_id, None) if task_id else None diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/client.py b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/client.py deleted file mode 100644 index 61f138d..0000000 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/client.py +++ /dev/null @@ -1,190 +0,0 @@ -"""Platform relay client: tool discovery and WebSocket connection management.""" - -from __future__ import annotations - -import logging -import threading -import time -from collections.abc import Callable -from typing import Any - -import httpx - -logger = logging.getLogger(__name__) - -_LOCALHOST = ("localhost", "127.0.0.1", "::1") - - -def _is_localhost(base_url: str) -> bool: - host = base_url.split("/")[0].split(":")[0] - return host in _LOCALHOST or base_url.startswith("localhost:") - - -def build_ws_url(base_url: str) -> str: - """Return ws:// for localhost, wss:// otherwise. - - Mirrors ``domyn expose._build_ws_url`` β€” strips any ``http://``/``https://`` - prefix (the env var sometimes carries one) and uses the raw hostname (no - ``api.`` prefix), unlike the HTTP API URL. - """ - for prefix in ("https://", "http://"): - if base_url.startswith(prefix): - base_url = base_url[len(prefix) :] - break - base_url = base_url.rstrip("/") - scheme = "ws" if _is_localhost(base_url) else "wss" - return f"{scheme}://{base_url}/relay/v1/ws" - - -def build_api_base_url(base_url: str) -> str: - """Translate ```` into ``https://api.``. - - Mirrors ``domyn expose._build_api_base`` / ``domyn_platform._resolve_platform_args`` - β€” every platform HTTP call goes through the ``api.`` subdomain. Localhost - is special-cased so the in-process stub keeps working unchanged. - """ - if _is_localhost(base_url): - return f"http://{base_url.rstrip('/')}" - transformed = base_url.replace("://", "://api.") - if not transformed.startswith(("http://", "https://")): - transformed = f"https://api.{transformed}" - return transformed.rstrip("/") - - -def fetch_tools( - base_url: str, - space_id: str, - channel_id: str, - api_key: str, - configuration_id: str | None = None, -) -> list[dict[str, Any]]: - """Fetch the canvas tool list from the platform (synchronous HTTP POST). - - Matches ``domyn_agents.integrations.langgraph.domyn_platform._fetch_tool_definitions``: - endpoint is ``list_delegate_tools_for_channel`` and the body carries - ``space_id``, ``channel_id`` and optional ``configuration_id``. - """ - api_base = build_api_base_url(base_url) - url = f"{api_base}/api/agents-service/tool/list_delegate_tools_for_channel" - resp = httpx.post( - url, - headers={"api-key": api_key, "Content-Type": "application/json"}, - json={ - "space_id": space_id, - "channel_id": channel_id, - "configuration_id": configuration_id, - }, - timeout=10.0, - ) - resp.raise_for_status() - data = resp.json() - if isinstance(data, list): - return data - if isinstance(data, dict): - for key in ("tools", "data", "results"): - if key in data and isinstance(data[key], list): - return data[key] - raise ValueError(f"Unexpected tool list response shape: {type(data)}") - - -def _deregister_tool(name: str) -> None: - """Remove a tool from the hermes tool registry.""" - try: - from tools.registry import registry - - registry.deregister(name) - logger.info("platform-gateway: deregistered tool '%s'", name) - except ImportError: - logger.debug("platform-gateway: tools.registry unavailable, cannot deregister '%s'", name) - - -class RefreshLoop: - """Periodically re-fetches the canvas tool list and syncs the hermes registry. - - Runs in a daemon thread. On each interval it diffs the live tool list against - the currently-registered set: new tools are registered, removed tools are - deregistered. Unchanged tools are left alone. - - Pass ``_deregister`` in tests to avoid the hermes registry import. - """ - - def __init__( - self, - ctx: Any, - handler_factory: Callable[[str], Callable], - base_url: str, - space_id: str, - channel_id: str, - api_key: str, - interval: float, - initial_names: set[str], - configuration_id: str | None = None, - _deregister: Callable[[str], None] | None = None, - ) -> None: - self._ctx = ctx - self._handler_factory = handler_factory - self._base_url = base_url - self._space_id = space_id - self._channel_id = channel_id - self._configuration_id = configuration_id - self._api_key = api_key - self._interval = interval - self._registered: set[str] = set(initial_names) - self._deregister_fn = _deregister if _deregister is not None else _deregister_tool - - def start(self) -> None: - thread = threading.Thread(target=self._run, daemon=True) - thread.start() - - def _run(self) -> None: - while True: - time.sleep(self._interval) - self._refresh() - - def _refresh(self) -> None: - from .schema import convert_schema - - try: - raw_tools = fetch_tools( - self._base_url, - self._space_id, - self._channel_id, - self._api_key, - configuration_id=self._configuration_id, - ) - except Exception as exc: - logger.warning("platform-gateway: refresh fetch failed - %s", exc) - return - - new_defs = {t["name"]: t for t in raw_tools if t.get("name")} - new_names = set(new_defs.keys()) - - added = new_names - self._registered - removed = self._registered - new_names - - if not added and not removed: - return - - for name in removed: - self._deregister_fn(name) - - for name in added: - try: - schema = convert_schema(new_defs[name]) - except Exception as exc: - logger.warning("platform-gateway: skipping new tool '%s': %s", name, exc) - continue - self._ctx.register_tool( - name=name, - toolset="platform", - schema=schema, - handler=self._handler_factory(name), - is_async=True, - ) - - logger.info( - "platform-gateway: canvas refresh - +%d added, -%d removed", - len(added), - len(removed), - ) - self._registered = new_names diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/relay_client.py b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/relay_client.py deleted file mode 100644 index 0869035..0000000 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/relay_client.py +++ /dev/null @@ -1,130 +0,0 @@ -"""Domyn relay WebSocket client. - -Pure transport: framing, connect, send_event, receive-loop dispatch. -No business logic β€” adapter.py owns event routing. -""" - -from __future__ import annotations - -import asyncio -import contextlib -import logging -import random -from collections.abc import Awaitable, Callable -from typing import Any - -logger = logging.getLogger(__name__) - - -def _backoff_delay(attempt: int, *, rng: Callable[[], float] = random.random) -> float: - """Full-jitter exponential backoff. - - delay = min(30, 0.5 * 2**attempt) * (0.5 + 0.5 * rng()) - """ - base = min(30.0, 0.5 * (2**attempt)) - return base * (0.5 + 0.5 * rng()) - - -class DomynRelayClient: - """Async WebSocket client for the Domyn relay. - - Owns the connect/reconnect loop and exposes ``send_event`` for outbound - frames. Inbound frames are passed to a caller-supplied async callback. - """ - - def __init__( - self, - ws_url: str, - headers: dict[str, str], - on_event: Callable[[Any], Awaitable[None]] | None = None, - ) -> None: - self._ws_url = ws_url - self._headers = headers - self._on_event = on_event - self._ws: Any = None - self._task: asyncio.Task | None = None - self._stop = asyncio.Event() - - async def send_event(self, event: Any) -> None: - from domyn_agents.core import RelayMessage - - if self._ws is None: - raise RuntimeError("DomynRelayClient: not connected") - msg = RelayMessage(payload=event).model_dump_json() - # Full outbound frame at WARNING so we can see exactly what hits the - # wire (event_type, correlation IDs, thought/text parts). Truncate to - # 1000 chars so long observations don't drown the log. - logger.warning("DomynRelayClient: outbound %s", msg[:1000]) - await self._ws.send(msg) - - async def _consume(self, ws: Any) -> None: - """Iterate frames, parse RelayMessage, dispatch to on_event.""" - from domyn_agents.core import RelayMessage - - async for raw in ws: - try: - msg = RelayMessage.model_validate_json(raw) - except Exception as exc: - # Include the raw frame (truncated) so timeouts caused by - # silently-dropped TOOL_END frames can be diagnosed. The - # platform's schema sometimes diverges from domyn-agents' - # ToolAction (e.g. missing/extra fields). - logger.warning( - "DomynRelayClient: dropping malformed frame: %s | raw=%s", - str(exc)[:200], - str(raw)[:500], - ) - continue - if self._on_event is None: - continue - try: - await self._on_event(msg.payload) - except Exception as exc: - logger.warning("DomynRelayClient: on_event raised: %s", exc) - - async def connect(self) -> None: - """Spawn the connect loop task. Returns immediately.""" - if self._task is not None and not self._task.done(): - return - self._stop.clear() - self._task = asyncio.create_task(self._connect_loop()) - - async def disconnect(self) -> None: - """Stop the connect loop and close the active socket.""" - self._stop.set() - ws, self._ws = self._ws, None - if ws is not None: - with contextlib.suppress(Exception): - await ws.close() - if self._task is not None: - self._task.cancel() - with contextlib.suppress(asyncio.CancelledError, Exception): - await self._task - self._task = None - - async def _connect_loop(self) -> None: - import websockets - - attempt = 0 - while not self._stop.is_set(): - try: - async with websockets.connect(self._ws_url, additional_headers=self._headers) as ws: - self._ws = ws - attempt = 0 - await self._consume(ws) - except Exception as exc: - logger.warning("DomynRelayClient: connection error - %s", exc) - finally: - self._ws = None - - if self._stop.is_set(): - break - - delay = _backoff_delay(attempt) - logger.debug("DomynRelayClient: reconnecting in %.1fs", delay) - try: - await asyncio.wait_for(self._stop.wait(), timeout=delay) - break # stop requested during sleep - except TimeoutError: - pass - attempt = min(attempt + 1, 6) diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/schema.py b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/schema.py deleted file mode 100644 index 59ae82b..0000000 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/hermes_platform_gateway/schema.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Convert platform tool definitions to hermes JSON Schema format.""" - -from __future__ import annotations - -_TYPE_MAP: dict[str, str] = { - "str": "string", - "int": "integer", - "float": "number", - "bool": "boolean", - "list": "array", - "dict": "object", -} - - -def convert_schema(tool_def: dict) -> dict: - """Convert a platform tool definition to hermes JSON Schema format. - - A field is required unless is_required is explicitly False or a default is present. - """ - name = tool_def["name"] - description = tool_def.get("description", "") - param_list: list[dict] = tool_def.get("parameters") or [] - - properties: dict[str, dict] = {} - required: list[str] = [] - - for p in param_list: - p_name = p.get("name") - if not p_name: - continue - - json_type = _TYPE_MAP.get(p.get("type", "str"), "string") - prop: dict = {"type": json_type} - - p_desc = p.get("description", "") - if p_desc: - prop["description"] = p_desc - - if "default" in p: - prop["default"] = p["default"] - elif p.get("is_required", True) is not False: - required.append(p_name) - - properties[p_name] = prop - - schema: dict = { - "name": name, - "description": description, - "parameters": { - "type": "object", - "properties": properties, - }, - } - if required: - schema["parameters"]["required"] = required - - return schema diff --git a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/pyproject.toml b/services/hermes_platform_gateway/plugins/hermes-platform-gateway/pyproject.toml deleted file mode 100644 index 8b55654..0000000 --- a/services/hermes_platform_gateway/plugins/hermes-platform-gateway/pyproject.toml +++ /dev/null @@ -1,22 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "hermes-platform-gateway" -version = "0.3.0" -description = "Dynamic platform tool gateway plugin for hermes-agent" -requires-python = ">=3.11" -dependencies = [ - "httpx>=0.27", - "websockets>=12", -] - -[project.optional-dependencies] -dev = ["pytest>=7", "pytest-asyncio>=0.23", "respx>=0.20", "httpx>=0.27", "aiohttp>=3.9"] - -[project.entry-points."hermes_agent.plugins"] -platform-gateway = "hermes_platform_gateway" - -[tool.pytest.ini_options] -asyncio_mode = "strict" From d6ff34cf26d23b175baa5f3a85c5169a655335a4 Mon Sep 17 00:00:00 2001 From: Christian Serra Date: Wed, 27 May 2026 12:55:32 +0200 Subject: [PATCH 5/9] fix: make build --- services/hermes_platform_gateway/Dockerfile | 4 +++- services/hermes_platform_gateway/Makefile | 12 ++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/services/hermes_platform_gateway/Dockerfile b/services/hermes_platform_gateway/Dockerfile index 314b130..4319a9b 100644 --- a/services/hermes_platform_gateway/Dockerfile +++ b/services/hermes_platform_gateway/Dockerfile @@ -26,8 +26,10 @@ RUN pip install --no-cache-dir \ # 3) domyn-agents β€” required by the gateway plugin for relay event models. # Bundled as a wheel under ./wheels so the build doesn't need the private # igenius PyPI index. +# `langchain-core` is pulled in because the `domyn` CLI eagerly imports the +# langgraph integration (needed by step 4's `domyn install-plugin` call). COPY wheels/ wheels/ -RUN pip install --no-cache-dir wheels/domyn_agents-*.whl +RUN pip install --no-cache-dir wheels/domyn_agents-*.whl langchain-core # 4) hermes-platform-gateway plugin. Two installs are needed: # - The pip package goes into site-packages so the plugin's diff --git a/services/hermes_platform_gateway/Makefile b/services/hermes_platform_gateway/Makefile index 31013de..78728c4 100644 --- a/services/hermes_platform_gateway/Makefile +++ b/services/hermes_platform_gateway/Makefile @@ -1,7 +1,8 @@ -.PHONY: help build up down logs shell clean +.PHONY: help build build-wheel up down logs shell clean help: @echo "Targets:" + @echo " build-wheel Build the wheel file for domyn-agents" @echo " build Build the docker image" @echo " up docker compose up -d" @echo " down docker compose down" @@ -9,7 +10,14 @@ help: @echo " shell Exec a bash shell into the running container" @echo " clean docker compose down -v --rmi local" -build: + +DOMYN_AGENT_PATH ?=../../../domyn-agents +build-wheel: + rm $(DOMYN_AGENT_PATH)/dist/*.whl + $(MAKE) -C $(DOMYN_AGENT_PATH) build-wheel + cp $(DOMYN_AGENT_PATH)/dist/*.whl ./wheels/ + +build: build-wheel docker compose build up: From 628c7a12b43036158bd7a016128543103142df56 Mon Sep 17 00:00:00 2001 From: Christian Serra Date: Wed, 27 May 2026 13:05:00 +0200 Subject: [PATCH 6/9] fix: remove plugin.yaml it will be installed by domyn agents cli --- services/hermes_platform_gateway/Dockerfile | 2 +- services/hermes_platform_gateway/plugin.yaml | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) delete mode 100644 services/hermes_platform_gateway/plugin.yaml diff --git a/services/hermes_platform_gateway/Dockerfile b/services/hermes_platform_gateway/Dockerfile index 4319a9b..21d019e 100644 --- a/services/hermes_platform_gateway/Dockerfile +++ b/services/hermes_platform_gateway/Dockerfile @@ -42,7 +42,7 @@ RUN pip install --no-cache-dir wheels/domyn_agents-*.whl langchain-core # The hermes-specific manifest (plugin.yaml) is dropped in afterwards # since `domyn install-plugin` doesn't generate one. RUN domyn install-plugin /opt/hermes-platform-gateway --agent-type hermes -COPY plugin.yaml /opt/hermes-platform-gateway/plugin.yaml + RUN pip install --no-cache-dir /opt/hermes-platform-gateway && \ mkdir -p ${HERMES_HOME}/plugins/hermes_platform_gateway && \ cp /opt/hermes-platform-gateway/hermes_platform_gateway/*.py \ diff --git a/services/hermes_platform_gateway/plugin.yaml b/services/hermes_platform_gateway/plugin.yaml deleted file mode 100644 index 09f1994..0000000 --- a/services/hermes_platform_gateway/plugin.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: hermes_platform_gateway -version: "2.0" -description: Domyn platform gateway β€” multi-conversation adapter; bridges hermes gateway ⇄ Domyn relay WebSocket From 2e9b24767b80a47a2d4cf90a9326336606dcdbaa Mon Sep 17 00:00:00 2001 From: Christian Serra Date: Wed, 27 May 2026 13:15:00 +0200 Subject: [PATCH 7/9] fix: stale volume --- services/hermes_platform_gateway/entrypoint.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/services/hermes_platform_gateway/entrypoint.sh b/services/hermes_platform_gateway/entrypoint.sh index ed98c21..e647705 100644 --- a/services/hermes_platform_gateway/entrypoint.sh +++ b/services/hermes_platform_gateway/entrypoint.sh @@ -20,6 +20,18 @@ TARGET="$HERMES_HOME/config.yaml" mkdir -p "$HERMES_HOME" +# Re-sync the platform-gateway plugin from the image into $HERMES_HOME on +# every boot. $HERMES_HOME is a docker-compose named volume that persists +# across rebuilds, so plugin files copied there at image-build time get +# shadowed by whatever the volume captured on its first run. Without this +# step, `docker compose build` updates the image but the running plugin +# stays stale until you `down -v`. +PLUGIN_SRC=/opt/hermes-platform-gateway/hermes_platform_gateway +PLUGIN_MANIFEST=/opt/hermes-platform-gateway/plugin.yaml +PLUGIN_DEST="$HERMES_HOME/plugins/hermes_platform_gateway" +mkdir -p "$PLUGIN_DEST" +cp -f "$PLUGIN_SRC"/*.py "$PLUGIN_MANIFEST" "$PLUGIN_DEST/" + python3 - <<'PY' import os import re From a1bf9658be0a6bca9b40fb470555ce66fffc9f83 Mon Sep 17 00:00:00 2001 From: Christian Serra Date: Wed, 27 May 2026 14:23:15 +0200 Subject: [PATCH 8/9] fix: domyn install-plugin --- services/hermes_platform_gateway/Dockerfile | 25 ++++++------------- .../hermes_platform_gateway/entrypoint.sh | 19 ++++++-------- .../hermes-config.yaml | 2 +- 3 files changed, 16 insertions(+), 30 deletions(-) diff --git a/services/hermes_platform_gateway/Dockerfile b/services/hermes_platform_gateway/Dockerfile index 21d019e..54357c6 100644 --- a/services/hermes_platform_gateway/Dockerfile +++ b/services/hermes_platform_gateway/Dockerfile @@ -31,24 +31,13 @@ RUN pip install --no-cache-dir \ COPY wheels/ wheels/ RUN pip install --no-cache-dir wheels/domyn_agents-*.whl langchain-core -# 4) hermes-platform-gateway plugin. Two installs are needed: -# - The pip package goes into site-packages so the plugin's -# `from hermes_platform_gateway.client import …` imports resolve. -# - The manifest + __init__.py are also dropped under $HERMES_HOME/plugins -# so hermes' discovery sees a `plugin.yaml` and calls `register(ctx)`. -# -# The plugin project is scaffolded from the vendored source inside -# domyn-agents via `domyn install-plugin` β€” no in-repo plugin tree needed. -# The hermes-specific manifest (plugin.yaml) is dropped in afterwards -# since `domyn install-plugin` doesn't generate one. -RUN domyn install-plugin /opt/hermes-platform-gateway --agent-type hermes - -RUN pip install --no-cache-dir /opt/hermes-platform-gateway && \ - mkdir -p ${HERMES_HOME}/plugins/hermes_platform_gateway && \ - cp /opt/hermes-platform-gateway/hermes_platform_gateway/*.py \ - ${HERMES_HOME}/plugins/hermes_platform_gateway/ && \ - cp /opt/hermes-platform-gateway/plugin.yaml \ - ${HERMES_HOME}/plugins/hermes_platform_gateway/ +# 4) hermes-platform-gateway plugin. +# `domyn install-plugin` drops the vendored plugin source plus its +# `plugin.yaml` manifest directly into ${HERMES_HOME}/plugins/hermes_platform_gateway/, +# which is what hermes' on-disk plugin scan picks up at startup. No pip +# install or manual cp is needed β€” the plugin uses relative imports and +# `domyn-agents` is already on sys.path from step 3. +RUN domyn install-plugin --agent-type hermes # 5) i18n catalog β€” upstream hermes' pip install does NOT package its # locales/ directory, so every t() call falls back to the raw key diff --git a/services/hermes_platform_gateway/entrypoint.sh b/services/hermes_platform_gateway/entrypoint.sh index e647705..6cdd9e8 100644 --- a/services/hermes_platform_gateway/entrypoint.sh +++ b/services/hermes_platform_gateway/entrypoint.sh @@ -20,17 +20,14 @@ TARGET="$HERMES_HOME/config.yaml" mkdir -p "$HERMES_HOME" -# Re-sync the platform-gateway plugin from the image into $HERMES_HOME on -# every boot. $HERMES_HOME is a docker-compose named volume that persists -# across rebuilds, so plugin files copied there at image-build time get -# shadowed by whatever the volume captured on its first run. Without this -# step, `docker compose build` updates the image but the running plugin -# stays stale until you `down -v`. -PLUGIN_SRC=/opt/hermes-platform-gateway/hermes_platform_gateway -PLUGIN_MANIFEST=/opt/hermes-platform-gateway/plugin.yaml -PLUGIN_DEST="$HERMES_HOME/plugins/hermes_platform_gateway" -mkdir -p "$PLUGIN_DEST" -cp -f "$PLUGIN_SRC"/*.py "$PLUGIN_MANIFEST" "$PLUGIN_DEST/" +# Re-sync the platform-gateway plugin from the vendored source on every boot. +# $HERMES_HOME is a docker-compose named volume that persists across rebuilds, +# so anything written into it at image-build time gets shadowed by the volume's +# captured contents on subsequent runs. Re-running `domyn install-plugin --force` +# pulls the current vendored source out of the domyn-agents wheel and +# overwrites the plugin directory inside the volume, keeping it in sync with +# the image without requiring a `docker compose down -v`. +domyn install-plugin --agent-type hermes --force --yes python3 - <<'PY' import os diff --git a/services/hermes_platform_gateway/hermes-config.yaml b/services/hermes_platform_gateway/hermes-config.yaml index ab47eb7..3b9a8da 100644 --- a/services/hermes_platform_gateway/hermes-config.yaml +++ b/services/hermes_platform_gateway/hermes-config.yaml @@ -9,7 +9,7 @@ model: plugins: enabled: - - hermes_platform_gateway + - hermes-platform-gateway agent: # The Domyn canvas is the source of truth for tools on this worker. Drop From 42292d1ba322d23dc954ba6b84ee97e79316d52b Mon Sep 17 00:00:00 2001 From: Christian Serra Date: Wed, 27 May 2026 17:47:18 +0200 Subject: [PATCH 9/9] fix: framework --- services/hermes_platform_gateway/Dockerfile | 2 +- services/hermes_platform_gateway/entrypoint.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/hermes_platform_gateway/Dockerfile b/services/hermes_platform_gateway/Dockerfile index 54357c6..4842952 100644 --- a/services/hermes_platform_gateway/Dockerfile +++ b/services/hermes_platform_gateway/Dockerfile @@ -37,7 +37,7 @@ RUN pip install --no-cache-dir wheels/domyn_agents-*.whl langchain-core # which is what hermes' on-disk plugin scan picks up at startup. No pip # install or manual cp is needed β€” the plugin uses relative imports and # `domyn-agents` is already on sys.path from step 3. -RUN domyn install-plugin --agent-type hermes +RUN domyn install-plugin --framework hermes # 5) i18n catalog β€” upstream hermes' pip install does NOT package its # locales/ directory, so every t() call falls back to the raw key diff --git a/services/hermes_platform_gateway/entrypoint.sh b/services/hermes_platform_gateway/entrypoint.sh index 6cdd9e8..687df0e 100644 --- a/services/hermes_platform_gateway/entrypoint.sh +++ b/services/hermes_platform_gateway/entrypoint.sh @@ -27,7 +27,7 @@ mkdir -p "$HERMES_HOME" # pulls the current vendored source out of the domyn-agents wheel and # overwrites the plugin directory inside the volume, keeping it in sync with # the image without requiring a `docker compose down -v`. -domyn install-plugin --agent-type hermes --force --yes +domyn install-plugin --framework hermes --force python3 - <<'PY' import os